1
#
2
#   Methods for class "mcmcRocPrc", generated by mcmcRocPrc()
3
#
4

5
#' @rdname mcmcRocPrc
6
#' 
7
#' @export
8
print.mcmcRocPrc <- function(x, ...) {
9
  
10 1
  auc_roc <- x$area_under_roc
11 1
  auc_prc <- x$area_under_prc
12
  
13 1
  has_curves <- !is.null(x$roc_dat)
14 1
  has_sims   <- length(auc_roc) > 1
15
  
16 1
  if (!has_sims) {
17 1
    roc_msg <- sprintf("%.3f", round(auc_roc, 3))
18 1
    prc_msg <- sprintf("%.3f", round(auc_prc, 3))
19
  } else {
20 1
    roc_msg <- sprintf("%.3f [80%%: %.3f - %.3f]",
21 1
                       round(mean(auc_roc), 3), 
22 1
                       round(quantile(auc_roc, 0.1), 3), 
23 1
                       round(quantile(auc_roc, 0.9), 3))
24 1
    prc_msg <- sprintf("%.3f [80%%: %.3f - %.3f]",
25 1
                       round(mean(auc_prc), 3), 
26 1
                       round(quantile(auc_prc, 0.1), 3), 
27 1
                       round(quantile(auc_prc, 0.9), 3))
28
  }
29
  
30 1
  cat("mcmcRocPrc object\n")
31 1
  cat(sprintf("curves: %s; fullsims: %s\n", has_curves, has_sims))
32 1
  cat(sprintf("AUC-ROC: %s\n", roc_msg))
33 1
  cat(sprintf("AUC-PR:  %s\n", prc_msg))
34
  
35 1
  invisible(x)
36
}
37

38
#' @rdname mcmcRocPrc
39
#' 
40
#' @param n plot method: if `fullsims = TRUE`, how many sample curves to draw?
41
#' @param alpha plot method: alpha value for plotting sampled curves; between 0 and 1
42
#' 
43
#' @export
44
plot.mcmcRocPrc <- function(x, n = 40, alpha = .5, ...) {
45
  
46 1
  stopifnot(
47 1
    "Use mcmcRocPrc(..., curves = TRUE) to generate data for plots" = (!is.null(x$roc_dat)),
48 1
    "alpha must be between 0 and 1" = (alpha >= 0 & alpha <= 1),
49 1
    "n must be > 0" = (n > 0)
50
  )
51
  
52 1
  obj<- x
53 1
  fullsims <- length(obj$roc_dat) > 1
54
  
55 1
  if (!fullsims) {
56
    
57 1
    graphics::par(mfrow = c(1, 2))
58 1
    plot(obj$roc_dat[[1]], type = "s", xlab = "FPR", ylab = "TPR")
59 1
    graphics::abline(a = 0, b = 1, lty = 3, col = "gray50")
60
    
61 1
    prc_dat <- obj$prc_dat[[1]]
62
    # use first non-NaN y-value for y[1]
63 1
    prc_dat$y[1] <- prc_dat$y[2]
64 1
    plot(prc_dat, type = "l", xlab = "TPR", ylab = "Precision",
65 1
         ylim = c(0, 1))
66 1
    graphics::abline(a = attr(x, "y_pos_rate"), b = 0, lty = 3, col = "gray50")
67
    
68
  } else {
69
    
70 1
    graphics::par(mfrow = c(1, 2))
71
    
72 1
    roc_dat <- obj$roc_dat
73
    
74 1
    x <- lapply(roc_dat, `[[`, 1)
75 1
    x <- do.call(cbind, x)
76 1
    colnames(x) <- paste0("sim", 1:ncol(x))
77
    
78 1
    y <- lapply(roc_dat, `[[`, 2)
79 1
    y <- do.call(cbind, y)
80 1
    colnames(y) <- paste0("sim", 1:ncol(y))
81
    
82 1
    xavg <- rowMeans(x)
83 1
    yavg <- rowMeans(y)
84
    
85 1
    plot(xavg, yavg, type = "n", xlab = "FPR", ylab = "TPR")
86 1
    samples <- sample(1:ncol(x), n)
87 1
    for (i in samples) {
88 1
      graphics::lines(
89 1
        x[, i], y[, i], type = "s",
90 1
        col = grDevices::rgb(127, 127, 127, alpha = alpha*255, maxColorValue = 255)
91
      )
92
    }
93 1
    graphics::lines(xavg, yavg, type = "s")
94
    
95
    # PRC
96
    # The elements of prc_dat have different lengths, unlike roc_dat, so we
97
    # have to do the central curve differently.
98 1
    prc_dat <- obj$prc_dat
99
    
100 1
    x <- lapply(prc_dat, `[[`, 1)
101 1
    y <- lapply(prc_dat, `[[`, 2)
102
    
103
    # Instead of combining the list of curve coordinates from each sample into
104
    # two x and y matrices, we can first make a point cloud with all curve 
105
    # points from all samples, and then average the y values at all distinct
106
    # x coordinates. The x-axis plots recall (TPR), which will only have as 
107
    # many distinct values as there are positives in the data, so this does 
108
    # not lose any information about the x coordinates. 
109 1
    point_cloud <- data.frame(
110 1
      x = unlist(x),
111 1
      y = unlist(y)
112
    )
113 1
    point_cloud <- stats::aggregate(point_cloud[, "y", drop = FALSE], 
114
                                    # factor implicitly encodes distinct values only,
115
                                    # since they will get the same labels
116 1
                                    by = list(x = as.factor(point_cloud$x)), 
117 1
                                    FUN = mean)
118 1
    point_cloud$x <- as.numeric(as.character(point_cloud$x))
119 1
    xavg <- point_cloud$x
120 1
    yavg <- point_cloud$y
121
    
122 1
    plot(xavg, yavg, type = "n", xlab = "TPR", ylab = "Precision", ylim = c(0, 1))
123 1
    samples <- sample(1:length(prc_dat), n)
124 1
    for (i in samples) {
125 1
      graphics::lines(
126 1
        x[[i]], y[[i]], 
127 1
        col = grDevices::rgb(127, 127, 127, alpha = alpha*255, maxColorValue = 255)
128
      )
129
    }
130 1
    graphics::lines(xavg, yavg)
131
    
132
  }
133
  
134 1
  invisible(x)
135
}
136

137
#' @rdname mcmcRocPrc
138
#' 
139
#' @param row.names see [base::as.data.frame()] 
140
#' @param optional see [base::as.data.frame()]
141
#' @param what which information to extract and convert to a data frame?
142
#' 
143
#' @export
144
as.data.frame.mcmcRocPrc <- function(x, row.names = NULL, optional = FALSE,
145
                                     what = c("auc", "roc", "prc"), ...) {
146 1
  what <- match.arg(what)
147 1
  if (what=="auc") {
148
    # all 4 output types have AUC, so this should work across the board
149 1
    return(as.data.frame(x[c("area_under_roc", "area_under_prc")]))
150
    
151 1
  } else if (what %in% c("roc", "prc")) {
152 1
    if (what=="roc") element <- "roc_dat" else element <- "prc_dat"
153
    
154
    # if curves was FALSE, there will be no curve data...
155 1
    if (is.null(x[[element]])) {
156 1
      stop("No curve data; use mcmcRocPrc(..., curves = TRUE)")
157
    }
158
    
159
    # Otherwise, there will be either one set of coordinates if mcmcmRegPrc()
160
    # was called with fullsims = FALSE, or else N_sims curve data sets.
161
    # If the latter, we can return a long data frame with an identifying 
162
    # "sim" column to delineate the sim sets. To ensure consistency in output,
163
    # also add this column when fullsims = FALSE.
164
    
165
    # averaged, single coordinate set
166 1
    if (length(x[[element]])==1L) {
167 1
      return(data.frame(sim = 1L, x[[element]][[1]]))
168
    }
169
    
170
    # full sims
171
    # add a unique ID to each coordinate set
172 1
    outlist <- x[[element]]
173 1
    outlist <- Map(cbind, sim = (1:length(outlist)), outlist)
174
    # combine into long data frame
175 1
    outdf <- do.call(rbind, outlist)
176 1
    return(outdf)
177
  } 
178
  stop("Developer error (I should not be here): please file an issue on GitHub")  # nocov
179
}
180

181

182

183

Read our documentation on viewing source code .

Loading