mcmcRocPrc.R is quite long
Showing 2 of 2 files from the diff.
R/mcmcRocPrc.R
changed.
Newly tracked file
R/mcmcRocPrc-methods.R
created.
@@ -1,3 +1,12 @@
Loading
1 | + | # |
|
2 | + | # This file contains the mcmcRocPrc() S3 generic, which constructs objects |
|
3 | + | # of class "mcmcRocPrc". For methods for this class, see mcmcRocPrc-methods.R |
|
4 | + | # S3 methods for the mcmcRocPrc() generic handle different types of input |
|
5 | + | # e.g. "rjags" input produced by R2jags. |
|
6 | + | # |
|
7 | + | ||
8 | + | ||
9 | + | ||
1 | 10 | #' ROC and Precision-Recall Curves using Bayesian MCMC estimates |
|
2 | 11 | #' |
|
3 | 12 | #' Generate ROC and Precision-Recall curves after fitting a Bayesian logit or |
@@ -286,7 +295,8 @@
Loading
286 | 295 | } |
|
287 | 296 | ||
288 | 297 | ||
289 | - | # Methods for different input types --------------------------------------- |
|
298 | + | ||
299 | + | # JAGS-like input (rjags, R2jags, runjags) -------------------------------- |
|
290 | 300 | ||
291 | 301 | #' @rdname mcmcRocPrc |
|
292 | 302 | #' |
@@ -363,6 +373,10 @@
Loading
363 | 373 | } |
|
364 | 374 | ||
365 | 375 | ||
376 | + | # STAN-like input (rstan, rstanarm, brms) --------------------------------- |
|
377 | + | ||
378 | + | ||
379 | + | ||
366 | 380 | #' @rdname mcmcRocPrc |
|
367 | 381 | #' |
|
368 | 382 | #' @param data the data that was used in the `stan(data = ?, ...)` call |
@@ -474,6 +488,10 @@
Loading
474 | 488 | fullsims = fullsims) |
|
475 | 489 | } |
|
476 | 490 | ||
491 | + | ||
492 | + | # Other input types (MCMCpack, ...) --------------------------------------- |
|
493 | + | ||
494 | + | ||
477 | 495 | #' #' @rdname mcmcRocPrc |
|
478 | 496 | #' #' |
|
479 | 497 | #' #' @export |
@@ -528,184 +546,4 @@
Loading
528 | 546 | } |
|
529 | 547 | ||
530 | 548 | ||
531 | - | # mcmcRocPrc methods for other generics ----------------------------------- |
|
532 | - | ||
533 | - | #' @rdname mcmcRocPrc |
|
534 | - | #' |
|
535 | - | #' @export |
|
536 | - | print.mcmcRocPrc <- function(x, ...) { |
|
537 | - | ||
538 | - | auc_roc <- x$area_under_roc |
|
539 | - | auc_prc <- x$area_under_prc |
|
540 | - | ||
541 | - | has_curves <- !is.null(x$roc_dat) |
|
542 | - | has_sims <- length(auc_roc) > 1 |
|
543 | - | ||
544 | - | if (!has_sims) { |
|
545 | - | roc_msg <- sprintf("%.3f", round(auc_roc, 3)) |
|
546 | - | prc_msg <- sprintf("%.3f", round(auc_prc, 3)) |
|
547 | - | } else { |
|
548 | - | roc_msg <- sprintf("%.3f [80%%: %.3f - %.3f]", |
|
549 | - | round(mean(auc_roc), 3), |
|
550 | - | round(quantile(auc_roc, 0.1), 3), |
|
551 | - | round(quantile(auc_roc, 0.9), 3)) |
|
552 | - | prc_msg <- sprintf("%.3f [80%%: %.3f - %.3f]", |
|
553 | - | round(mean(auc_prc), 3), |
|
554 | - | round(quantile(auc_prc, 0.1), 3), |
|
555 | - | round(quantile(auc_prc, 0.9), 3)) |
|
556 | - | } |
|
557 | - | ||
558 | - | cat("mcmcRocPrc object\n") |
|
559 | - | cat(sprintf("curves: %s; fullsims: %s\n", has_curves, has_sims)) |
|
560 | - | cat(sprintf("AUC-ROC: %s\n", roc_msg)) |
|
561 | - | cat(sprintf("AUC-PR: %s\n", prc_msg)) |
|
562 | - | ||
563 | - | invisible(x) |
|
564 | - | } |
|
565 | - | ||
566 | - | #' @rdname mcmcRocPrc |
|
567 | - | #' |
|
568 | - | #' @param n plot method: if `fullsims = TRUE`, how many sample curves to draw? |
|
569 | - | #' @param alpha plot method: alpha value for plotting sampled curves; between 0 and 1 |
|
570 | - | #' |
|
571 | - | #' @export |
|
572 | - | plot.mcmcRocPrc <- function(x, n = 40, alpha = .5, ...) { |
|
573 | - | ||
574 | - | stopifnot( |
|
575 | - | "Use mcmcRocPrc(..., curves = TRUE) to generate data for plots" = (!is.null(x$roc_dat)), |
|
576 | - | "alpha must be between 0 and 1" = (alpha >= 0 & alpha <= 1), |
|
577 | - | "n must be > 0" = (n > 0) |
|
578 | - | ) |
|
579 | - | ||
580 | - | obj<- x |
|
581 | - | fullsims <- length(obj$roc_dat) > 1 |
|
582 | - | ||
583 | - | if (!fullsims) { |
|
584 | - | ||
585 | - | graphics::par(mfrow = c(1, 2)) |
|
586 | - | plot(obj$roc_dat[[1]], type = "s", xlab = "FPR", ylab = "TPR") |
|
587 | - | graphics::abline(a = 0, b = 1, lty = 3, col = "gray50") |
|
588 | - | ||
589 | - | prc_dat <- obj$prc_dat[[1]] |
|
590 | - | # use first non-NaN y-value for y[1] |
|
591 | - | prc_dat$y[1] <- prc_dat$y[2] |
|
592 | - | plot(prc_dat, type = "l", xlab = "TPR", ylab = "Precision", |
|
593 | - | ylim = c(0, 1)) |
|
594 | - | graphics::abline(a = attr(x, "y_pos_rate"), b = 0, lty = 3, col = "gray50") |
|
595 | - | ||
596 | - | } else { |
|
597 | - | ||
598 | - | graphics::par(mfrow = c(1, 2)) |
|
599 | - | ||
600 | - | roc_dat <- obj$roc_dat |
|
601 | - | ||
602 | - | x <- lapply(roc_dat, `[[`, 1) |
|
603 | - | x <- do.call(cbind, x) |
|
604 | - | colnames(x) <- paste0("sim", 1:ncol(x)) |
|
605 | - | ||
606 | - | y <- lapply(roc_dat, `[[`, 2) |
|
607 | - | y <- do.call(cbind, y) |
|
608 | - | colnames(y) <- paste0("sim", 1:ncol(y)) |
|
609 | - | ||
610 | - | xavg <- rowMeans(x) |
|
611 | - | yavg <- rowMeans(y) |
|
612 | - | ||
613 | - | plot(xavg, yavg, type = "n", xlab = "FPR", ylab = "TPR") |
|
614 | - | samples <- sample(1:ncol(x), n) |
|
615 | - | for (i in samples) { |
|
616 | - | graphics::lines( |
|
617 | - | x[, i], y[, i], type = "s", |
|
618 | - | col = grDevices::rgb(127, 127, 127, alpha = alpha*255, maxColorValue = 255) |
|
619 | - | ) |
|
620 | - | } |
|
621 | - | graphics::lines(xavg, yavg, type = "s") |
|
622 | - | ||
623 | - | # PRC |
|
624 | - | # The elements of prc_dat have different lengths, unlike roc_dat, so we |
|
625 | - | # have to do the central curve differently. |
|
626 | - | prc_dat <- obj$prc_dat |
|
627 | - | ||
628 | - | x <- lapply(prc_dat, `[[`, 1) |
|
629 | - | y <- lapply(prc_dat, `[[`, 2) |
|
630 | - | ||
631 | - | # Instead of combining the list of curve coordinates from each sample into |
|
632 | - | # two x and y matrices, we can first make a point cloud with all curve |
|
633 | - | # points from all samples, and then average the y values at all distinct |
|
634 | - | # x coordinates. The x-axis plots recall (TPR), which will only have as |
|
635 | - | # many distinct values as there are positives in the data, so this does |
|
636 | - | # not lose any information about the x coordinates. |
|
637 | - | point_cloud <- data.frame( |
|
638 | - | x = unlist(x), |
|
639 | - | y = unlist(y) |
|
640 | - | ) |
|
641 | - | point_cloud <- stats::aggregate(point_cloud[, "y", drop = FALSE], |
|
642 | - | # factor implicitly encodes distinct values only, |
|
643 | - | # since they will get the same labels |
|
644 | - | by = list(x = as.factor(point_cloud$x)), |
|
645 | - | FUN = mean) |
|
646 | - | point_cloud$x <- as.numeric(as.character(point_cloud$x)) |
|
647 | - | xavg <- point_cloud$x |
|
648 | - | yavg <- point_cloud$y |
|
649 | - | ||
650 | - | plot(xavg, yavg, type = "n", xlab = "TPR", ylab = "Precision", ylim = c(0, 1)) |
|
651 | - | samples <- sample(1:length(prc_dat), n) |
|
652 | - | for (i in samples) { |
|
653 | - | graphics::lines( |
|
654 | - | x[[i]], y[[i]], |
|
655 | - | col = grDevices::rgb(127, 127, 127, alpha = alpha*255, maxColorValue = 255) |
|
656 | - | ) |
|
657 | - | } |
|
658 | - | graphics::lines(xavg, yavg) |
|
659 | - | ||
660 | - | } |
|
661 | - | ||
662 | - | invisible(x) |
|
663 | - | } |
|
664 | - | ||
665 | - | #' @rdname mcmcRocPrc |
|
666 | - | #' |
|
667 | - | #' @param row.names see [base::as.data.frame()] |
|
668 | - | #' @param optional see [base::as.data.frame()] |
|
669 | - | #' @param what which information to extract and convert to a data frame? |
|
670 | - | #' |
|
671 | - | #' @export |
|
672 | - | as.data.frame.mcmcRocPrc <- function(x, row.names = NULL, optional = FALSE, |
|
673 | - | what = c("auc", "roc", "prc"), ...) { |
|
674 | - | what <- match.arg(what) |
|
675 | - | if (what=="auc") { |
|
676 | - | # all 4 output types have AUC, so this should work across the board |
|
677 | - | return(as.data.frame(x[c("area_under_roc", "area_under_prc")])) |
|
678 | - | ||
679 | - | } else if (what %in% c("roc", "prc")) { |
|
680 | - | if (what=="roc") element <- "roc_dat" else element <- "prc_dat" |
|
681 | - | ||
682 | - | # if curves was FALSE, there will be no curve data... |
|
683 | - | if (is.null(x[[element]])) { |
|
684 | - | stop("No curve data; use mcmcRocPrc(..., curves = TRUE)") |
|
685 | - | } |
|
686 | - | ||
687 | - | # Otherwise, there will be either one set of coordinates if mcmcmRegPrc() |
|
688 | - | # was called with fullsims = FALSE, or else N_sims curve data sets. |
|
689 | - | # If the latter, we can return a long data frame with an identifying |
|
690 | - | # "sim" column to delineate the sim sets. To ensure consistency in output, |
|
691 | - | # also add this column when fullsims = FALSE. |
|
692 | - | ||
693 | - | # averaged, single coordinate set |
|
694 | - | if (length(x[[element]])==1L) { |
|
695 | - | return(data.frame(sim = 1L, x[[element]][[1]])) |
|
696 | - | } |
|
697 | - | ||
698 | - | # full sims |
|
699 | - | # add a unique ID to each coordinate set |
|
700 | - | outlist <- x[[element]] |
|
701 | - | outlist <- Map(cbind, sim = (1:length(outlist)), outlist) |
|
702 | - | # combine into long data frame |
|
703 | - | outdf <- do.call(rbind, outlist) |
|
704 | - | return(outdf) |
|
705 | - | } |
|
706 | - | stop("Developer error (I should not be here): please file an issue on GitHub") # nocov |
|
707 | - | } |
|
708 | - | ||
709 | - | ||
710 | - | ||
711 | 549 |
@@ -0,0 +1,183 @@
Loading
1 | + | # |
|
2 | + | # Methods for class "mcmcRocPrc", generated by mcmcRocPrc() |
|
3 | + | # |
|
4 | + | ||
5 | + | #' @rdname mcmcRocPrc |
|
6 | + | #' |
|
7 | + | #' @export |
|
8 | + | print.mcmcRocPrc <- function(x, ...) { |
|
9 | + | ||
10 | + | auc_roc <- x$area_under_roc |
|
11 | + | auc_prc <- x$area_under_prc |
|
12 | + | ||
13 | + | has_curves <- !is.null(x$roc_dat) |
|
14 | + | has_sims <- length(auc_roc) > 1 |
|
15 | + | ||
16 | + | if (!has_sims) { |
|
17 | + | roc_msg <- sprintf("%.3f", round(auc_roc, 3)) |
|
18 | + | prc_msg <- sprintf("%.3f", round(auc_prc, 3)) |
|
19 | + | } else { |
|
20 | + | roc_msg <- sprintf("%.3f [80%%: %.3f - %.3f]", |
|
21 | + | round(mean(auc_roc), 3), |
|
22 | + | round(quantile(auc_roc, 0.1), 3), |
|
23 | + | round(quantile(auc_roc, 0.9), 3)) |
|
24 | + | prc_msg <- sprintf("%.3f [80%%: %.3f - %.3f]", |
|
25 | + | round(mean(auc_prc), 3), |
|
26 | + | round(quantile(auc_prc, 0.1), 3), |
|
27 | + | round(quantile(auc_prc, 0.9), 3)) |
|
28 | + | } |
|
29 | + | ||
30 | + | cat("mcmcRocPrc object\n") |
|
31 | + | cat(sprintf("curves: %s; fullsims: %s\n", has_curves, has_sims)) |
|
32 | + | cat(sprintf("AUC-ROC: %s\n", roc_msg)) |
|
33 | + | cat(sprintf("AUC-PR: %s\n", prc_msg)) |
|
34 | + | ||
35 | + | invisible(x) |
|
36 | + | } |
|
37 | + | ||
38 | + | #' @rdname mcmcRocPrc |
|
39 | + | #' |
|
40 | + | #' @param n plot method: if `fullsims = TRUE`, how many sample curves to draw? |
|
41 | + | #' @param alpha plot method: alpha value for plotting sampled curves; between 0 and 1 |
|
42 | + | #' |
|
43 | + | #' @export |
|
44 | + | plot.mcmcRocPrc <- function(x, n = 40, alpha = .5, ...) { |
|
45 | + | ||
46 | + | stopifnot( |
|
47 | + | "Use mcmcRocPrc(..., curves = TRUE) to generate data for plots" = (!is.null(x$roc_dat)), |
|
48 | + | "alpha must be between 0 and 1" = (alpha >= 0 & alpha <= 1), |
|
49 | + | "n must be > 0" = (n > 0) |
|
50 | + | ) |
|
51 | + | ||
52 | + | obj<- x |
|
53 | + | fullsims <- length(obj$roc_dat) > 1 |
|
54 | + | ||
55 | + | if (!fullsims) { |
|
56 | + | ||
57 | + | graphics::par(mfrow = c(1, 2)) |
|
58 | + | plot(obj$roc_dat[[1]], type = "s", xlab = "FPR", ylab = "TPR") |
|
59 | + | graphics::abline(a = 0, b = 1, lty = 3, col = "gray50") |
|
60 | + | ||
61 | + | prc_dat <- obj$prc_dat[[1]] |
|
62 | + | # use first non-NaN y-value for y[1] |
|
63 | + | prc_dat$y[1] <- prc_dat$y[2] |
|
64 | + | plot(prc_dat, type = "l", xlab = "TPR", ylab = "Precision", |
|
65 | + | ylim = c(0, 1)) |
|
66 | + | graphics::abline(a = attr(x, "y_pos_rate"), b = 0, lty = 3, col = "gray50") |
|
67 | + | ||
68 | + | } else { |
|
69 | + | ||
70 | + | graphics::par(mfrow = c(1, 2)) |
|
71 | + | ||
72 | + | roc_dat <- obj$roc_dat |
|
73 | + | ||
74 | + | x <- lapply(roc_dat, `[[`, 1) |
|
75 | + | x <- do.call(cbind, x) |
|
76 | + | colnames(x) <- paste0("sim", 1:ncol(x)) |
|
77 | + | ||
78 | + | y <- lapply(roc_dat, `[[`, 2) |
|
79 | + | y <- do.call(cbind, y) |
|
80 | + | colnames(y) <- paste0("sim", 1:ncol(y)) |
|
81 | + | ||
82 | + | xavg <- rowMeans(x) |
|
83 | + | yavg <- rowMeans(y) |
|
84 | + | ||
85 | + | plot(xavg, yavg, type = "n", xlab = "FPR", ylab = "TPR") |
|
86 | + | samples <- sample(1:ncol(x), n) |
|
87 | + | for (i in samples) { |
|
88 | + | graphics::lines( |
|
89 | + | x[, i], y[, i], type = "s", |
|
90 | + | col = grDevices::rgb(127, 127, 127, alpha = alpha*255, maxColorValue = 255) |
|
91 | + | ) |
|
92 | + | } |
|
93 | + | graphics::lines(xavg, yavg, type = "s") |
|
94 | + | ||
95 | + | # PRC |
|
96 | + | # The elements of prc_dat have different lengths, unlike roc_dat, so we |
|
97 | + | # have to do the central curve differently. |
|
98 | + | prc_dat <- obj$prc_dat |
|
99 | + | ||
100 | + | x <- lapply(prc_dat, `[[`, 1) |
|
101 | + | y <- lapply(prc_dat, `[[`, 2) |
|
102 | + | ||
103 | + | # Instead of combining the list of curve coordinates from each sample into |
|
104 | + | # two x and y matrices, we can first make a point cloud with all curve |
|
105 | + | # points from all samples, and then average the y values at all distinct |
|
106 | + | # x coordinates. The x-axis plots recall (TPR), which will only have as |
|
107 | + | # many distinct values as there are positives in the data, so this does |
|
108 | + | # not lose any information about the x coordinates. |
|
109 | + | point_cloud <- data.frame( |
|
110 | + | x = unlist(x), |
|
111 | + | y = unlist(y) |
|
112 | + | ) |
|
113 | + | point_cloud <- stats::aggregate(point_cloud[, "y", drop = FALSE], |
|
114 | + | # factor implicitly encodes distinct values only, |
|
115 | + | # since they will get the same labels |
|
116 | + | by = list(x = as.factor(point_cloud$x)), |
|
117 | + | FUN = mean) |
|
118 | + | point_cloud$x <- as.numeric(as.character(point_cloud$x)) |
|
119 | + | xavg <- point_cloud$x |
|
120 | + | yavg <- point_cloud$y |
|
121 | + | ||
122 | + | plot(xavg, yavg, type = "n", xlab = "TPR", ylab = "Precision", ylim = c(0, 1)) |
|
123 | + | samples <- sample(1:length(prc_dat), n) |
|
124 | + | for (i in samples) { |
|
125 | + | graphics::lines( |
|
126 | + | x[[i]], y[[i]], |
|
127 | + | col = grDevices::rgb(127, 127, 127, alpha = alpha*255, maxColorValue = 255) |
|
128 | + | ) |
|
129 | + | } |
|
130 | + | graphics::lines(xavg, yavg) |
|
131 | + | ||
132 | + | } |
|
133 | + | ||
134 | + | invisible(x) |
|
135 | + | } |
|
136 | + | ||
137 | + | #' @rdname mcmcRocPrc |
|
138 | + | #' |
|
139 | + | #' @param row.names see [base::as.data.frame()] |
|
140 | + | #' @param optional see [base::as.data.frame()] |
|
141 | + | #' @param what which information to extract and convert to a data frame? |
|
142 | + | #' |
|
143 | + | #' @export |
|
144 | + | as.data.frame.mcmcRocPrc <- function(x, row.names = NULL, optional = FALSE, |
|
145 | + | what = c("auc", "roc", "prc"), ...) { |
|
146 | + | what <- match.arg(what) |
|
147 | + | if (what=="auc") { |
|
148 | + | # all 4 output types have AUC, so this should work across the board |
|
149 | + | return(as.data.frame(x[c("area_under_roc", "area_under_prc")])) |
|
150 | + | ||
151 | + | } else if (what %in% c("roc", "prc")) { |
|
152 | + | if (what=="roc") element <- "roc_dat" else element <- "prc_dat" |
|
153 | + | ||
154 | + | # if curves was FALSE, there will be no curve data... |
|
155 | + | if (is.null(x[[element]])) { |
|
156 | + | stop("No curve data; use mcmcRocPrc(..., curves = TRUE)") |
|
157 | + | } |
|
158 | + | ||
159 | + | # Otherwise, there will be either one set of coordinates if mcmcmRegPrc() |
|
160 | + | # was called with fullsims = FALSE, or else N_sims curve data sets. |
|
161 | + | # If the latter, we can return a long data frame with an identifying |
|
162 | + | # "sim" column to delineate the sim sets. To ensure consistency in output, |
|
163 | + | # also add this column when fullsims = FALSE. |
|
164 | + | ||
165 | + | # averaged, single coordinate set |
|
166 | + | if (length(x[[element]])==1L) { |
|
167 | + | return(data.frame(sim = 1L, x[[element]][[1]])) |
|
168 | + | } |
|
169 | + | ||
170 | + | # full sims |
|
171 | + | # add a unique ID to each coordinate set |
|
172 | + | outlist <- x[[element]] |
|
173 | + | outlist <- Map(cbind, sim = (1:length(outlist)), outlist) |
|
174 | + | # combine into long data frame |
|
175 | + | outdf <- do.call(rbind, outlist) |
|
176 | + | return(outdf) |
|
177 | + | } |
|
178 | + | stop("Developer error (I should not be here): please file an issue on GitHub") # nocov |
|
179 | + | } |
|
180 | + | ||
181 | + | ||
182 | + | ||
183 | + |
Files | Coverage |
---|---|
R | 83.11% |
Project Totals (11 files) | 83.11% |
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file.
The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files.
The size and color of each slice is representing the number of statements and the coverage, respectively.