mcmcRocPrc.R is quite long
1 |
#
|
|
2 |
# Methods for class "mcmcRocPrc", generated by mcmcRocPrc()
|
|
3 |
#
|
|
4 |
|
|
5 |
#' @rdname mcmcRocPrc
|
|
6 |
#'
|
|
7 |
#' @export
|
|
8 |
print.mcmcRocPrc <- function(x, ...) { |
|
9 |
|
|
10 | 1 |
auc_roc <- x$area_under_roc |
11 | 1 |
auc_prc <- x$area_under_prc |
12 |
|
|
13 | 1 |
has_curves <- !is.null(x$roc_dat) |
14 | 1 |
has_sims <- length(auc_roc) > 1 |
15 |
|
|
16 | 1 |
if (!has_sims) { |
17 | 1 |
roc_msg <- sprintf("%.3f", round(auc_roc, 3)) |
18 | 1 |
prc_msg <- sprintf("%.3f", round(auc_prc, 3)) |
19 |
} else { |
|
20 | 1 |
roc_msg <- sprintf("%.3f [80%%: %.3f - %.3f]", |
21 | 1 |
round(mean(auc_roc), 3), |
22 | 1 |
round(quantile(auc_roc, 0.1), 3), |
23 | 1 |
round(quantile(auc_roc, 0.9), 3)) |
24 | 1 |
prc_msg <- sprintf("%.3f [80%%: %.3f - %.3f]", |
25 | 1 |
round(mean(auc_prc), 3), |
26 | 1 |
round(quantile(auc_prc, 0.1), 3), |
27 | 1 |
round(quantile(auc_prc, 0.9), 3)) |
28 |
}
|
|
29 |
|
|
30 | 1 |
cat("mcmcRocPrc object\n") |
31 | 1 |
cat(sprintf("curves: %s; fullsims: %s\n", has_curves, has_sims)) |
32 | 1 |
cat(sprintf("AUC-ROC: %s\n", roc_msg)) |
33 | 1 |
cat(sprintf("AUC-PR: %s\n", prc_msg)) |
34 |
|
|
35 | 1 |
invisible(x) |
36 |
}
|
|
37 |
|
|
38 |
#' @rdname mcmcRocPrc
|
|
39 |
#'
|
|
40 |
#' @param n plot method: if `fullsims = TRUE`, how many sample curves to draw?
|
|
41 |
#' @param alpha plot method: alpha value for plotting sampled curves; between 0 and 1
|
|
42 |
#'
|
|
43 |
#' @export
|
|
44 |
plot.mcmcRocPrc <- function(x, n = 40, alpha = .5, ...) { |
|
45 |
|
|
46 | 1 |
stopifnot( |
47 | 1 |
"Use mcmcRocPrc(..., curves = TRUE) to generate data for plots" = (!is.null(x$roc_dat)), |
48 | 1 |
"alpha must be between 0 and 1" = (alpha >= 0 & alpha <= 1), |
49 | 1 |
"n must be > 0" = (n > 0) |
50 |
)
|
|
51 |
|
|
52 | 1 |
obj<- x
|
53 | 1 |
fullsims <- length(obj$roc_dat) > 1 |
54 |
|
|
55 | 1 |
if (!fullsims) { |
56 |
|
|
57 | 1 |
graphics::par(mfrow = c(1, 2)) |
58 | 1 |
plot(obj$roc_dat[[1]], type = "s", xlab = "FPR", ylab = "TPR") |
59 | 1 |
graphics::abline(a = 0, b = 1, lty = 3, col = "gray50") |
60 |
|
|
61 | 1 |
prc_dat <- obj$prc_dat[[1]] |
62 |
# use first non-NaN y-value for y[1]
|
|
63 | 1 |
prc_dat$y[1] <- prc_dat$y[2] |
64 | 1 |
plot(prc_dat, type = "l", xlab = "TPR", ylab = "Precision", |
65 | 1 |
ylim = c(0, 1)) |
66 | 1 |
graphics::abline(a = attr(x, "y_pos_rate"), b = 0, lty = 3, col = "gray50") |
67 |
|
|
68 |
} else { |
|
69 |
|
|
70 | 1 |
graphics::par(mfrow = c(1, 2)) |
71 |
|
|
72 | 1 |
roc_dat <- obj$roc_dat |
73 |
|
|
74 | 1 |
x <- lapply(roc_dat, `[[`, 1) |
75 | 1 |
x <- do.call(cbind, x) |
76 | 1 |
colnames(x) <- paste0("sim", 1:ncol(x)) |
77 |
|
|
78 | 1 |
y <- lapply(roc_dat, `[[`, 2) |
79 | 1 |
y <- do.call(cbind, y) |
80 | 1 |
colnames(y) <- paste0("sim", 1:ncol(y)) |
81 |
|
|
82 | 1 |
xavg <- rowMeans(x) |
83 | 1 |
yavg <- rowMeans(y) |
84 |
|
|
85 | 1 |
plot(xavg, yavg, type = "n", xlab = "FPR", ylab = "TPR") |
86 | 1 |
samples <- sample(1:ncol(x), n) |
87 | 1 |
for (i in samples) { |
88 | 1 |
graphics::lines( |
89 | 1 |
x[, i], y[, i], type = "s", |
90 | 1 |
col = grDevices::rgb(127, 127, 127, alpha = alpha*255, maxColorValue = 255) |
91 |
)
|
|
92 |
}
|
|
93 | 1 |
graphics::lines(xavg, yavg, type = "s") |
94 |
|
|
95 |
# PRC
|
|
96 |
# The elements of prc_dat have different lengths, unlike roc_dat, so we
|
|
97 |
# have to do the central curve differently.
|
|
98 | 1 |
prc_dat <- obj$prc_dat |
99 |
|
|
100 | 1 |
x <- lapply(prc_dat, `[[`, 1) |
101 | 1 |
y <- lapply(prc_dat, `[[`, 2) |
102 |
|
|
103 |
# Instead of combining the list of curve coordinates from each sample into
|
|
104 |
# two x and y matrices, we can first make a point cloud with all curve
|
|
105 |
# points from all samples, and then average the y values at all distinct
|
|
106 |
# x coordinates. The x-axis plots recall (TPR), which will only have as
|
|
107 |
# many distinct values as there are positives in the data, so this does
|
|
108 |
# not lose any information about the x coordinates.
|
|
109 | 1 |
point_cloud <- data.frame( |
110 | 1 |
x = unlist(x), |
111 | 1 |
y = unlist(y) |
112 |
)
|
|
113 | 1 |
point_cloud <- stats::aggregate(point_cloud[, "y", drop = FALSE], |
114 |
# factor implicitly encodes distinct values only,
|
|
115 |
# since they will get the same labels
|
|
116 | 1 |
by = list(x = as.factor(point_cloud$x)), |
117 | 1 |
FUN = mean) |
118 | 1 |
point_cloud$x <- as.numeric(as.character(point_cloud$x)) |
119 | 1 |
xavg <- point_cloud$x |
120 | 1 |
yavg <- point_cloud$y |
121 |
|
|
122 | 1 |
plot(xavg, yavg, type = "n", xlab = "TPR", ylab = "Precision", ylim = c(0, 1)) |
123 | 1 |
samples <- sample(1:length(prc_dat), n) |
124 | 1 |
for (i in samples) { |
125 | 1 |
graphics::lines( |
126 | 1 |
x[[i]], y[[i]], |
127 | 1 |
col = grDevices::rgb(127, 127, 127, alpha = alpha*255, maxColorValue = 255) |
128 |
)
|
|
129 |
}
|
|
130 | 1 |
graphics::lines(xavg, yavg) |
131 |
|
|
132 |
}
|
|
133 |
|
|
134 | 1 |
invisible(x) |
135 |
}
|
|
136 |
|
|
137 |
#' @rdname mcmcRocPrc
|
|
138 |
#'
|
|
139 |
#' @param row.names see [base::as.data.frame()]
|
|
140 |
#' @param optional see [base::as.data.frame()]
|
|
141 |
#' @param what which information to extract and convert to a data frame?
|
|
142 |
#'
|
|
143 |
#' @export
|
|
144 |
as.data.frame.mcmcRocPrc <- function(x, row.names = NULL, optional = FALSE, |
|
145 |
what = c("auc", "roc", "prc"), ...) { |
|
146 | 1 |
what <- match.arg(what) |
147 | 1 |
if (what=="auc") { |
148 |
# all 4 output types have AUC, so this should work across the board
|
|
149 | 1 |
return(as.data.frame(x[c("area_under_roc", "area_under_prc")])) |
150 |
|
|
151 | 1 |
} else if (what %in% c("roc", "prc")) { |
152 | 1 |
if (what=="roc") element <- "roc_dat" else element <- "prc_dat" |
153 |
|
|
154 |
# if curves was FALSE, there will be no curve data...
|
|
155 | 1 |
if (is.null(x[[element]])) { |
156 | 1 |
stop("No curve data; use mcmcRocPrc(..., curves = TRUE)") |
157 |
}
|
|
158 |
|
|
159 |
# Otherwise, there will be either one set of coordinates if mcmcmRegPrc()
|
|
160 |
# was called with fullsims = FALSE, or else N_sims curve data sets.
|
|
161 |
# If the latter, we can return a long data frame with an identifying
|
|
162 |
# "sim" column to delineate the sim sets. To ensure consistency in output,
|
|
163 |
# also add this column when fullsims = FALSE.
|
|
164 |
|
|
165 |
# averaged, single coordinate set
|
|
166 | 1 |
if (length(x[[element]])==1L) { |
167 | 1 |
return(data.frame(sim = 1L, x[[element]][[1]])) |
168 |
}
|
|
169 |
|
|
170 |
# full sims
|
|
171 |
# add a unique ID to each coordinate set
|
|
172 | 1 |
outlist <- x[[element]] |
173 | 1 |
outlist <- Map(cbind, sim = (1:length(outlist)), outlist) |
174 |
# combine into long data frame
|
|
175 | 1 |
outdf <- do.call(rbind, outlist) |
176 | 1 |
return(outdf) |
177 |
}
|
|
178 |
stop("Developer error (I should not be here): please file an issue on GitHub") # nocov |
|
179 |
}
|
|
180 |
|
|
181 |
|
|
182 |
|
|
183 |
|
Read our documentation on viewing source code .