Showing 2 of 2 files from the diff.

@@ -1,3 +1,12 @@
Loading
1 +
#
2 +
#   This file contains the mcmcRocPrc() S3 generic, which constructs objects
3 +
#   of class "mcmcRocPrc". For methods for this class, see mcmcRocPrc-methods.R
4 +
#   S3 methods for the mcmcRocPrc() generic handle different types of input
5 +
#   e.g. "rjags" input produced by R2jags.
6 +
#
7 +
8 +
9 +
1 10
#' ROC and Precision-Recall Curves using Bayesian MCMC estimates
2 11
#' 
3 12
#' Generate ROC and Precision-Recall curves after fitting a Bayesian logit or 
@@ -286,7 +295,8 @@
Loading
286 295
}
287 296
288 297
289 -
# Methods for different input types ---------------------------------------
298 +
299 +
# JAGS-like input (rjags, R2jags, runjags) --------------------------------
290 300
291 301
#' @rdname mcmcRocPrc
292 302
#' 
@@ -363,6 +373,10 @@
Loading
363 373
}
364 374
365 375
376 +
# STAN-like input (rstan, rstanarm, brms) ---------------------------------
377 +
378 +
379 +
366 380
#' @rdname mcmcRocPrc
367 381
#' 
368 382
#' @param data the data that was used in the `stan(data = ?, ...)` call
@@ -474,6 +488,10 @@
Loading
474 488
                 fullsims = fullsims)
475 489
}
476 490
491 +
492 +
# Other input types (MCMCpack, ...) ---------------------------------------
493 +
494 +
477 495
#' #' @rdname mcmcRocPrc
478 496
#' #'
479 497
#' #' @export
@@ -528,184 +546,4 @@
Loading
528 546
}
529 547
530 548
531 -
# mcmcRocPrc methods for other generics -----------------------------------
532 -
533 -
#' @rdname mcmcRocPrc
534 -
#' 
535 -
#' @export
536 -
print.mcmcRocPrc <- function(x, ...) {
537 -
  
538 -
  auc_roc <- x$area_under_roc
539 -
  auc_prc <- x$area_under_prc
540 -
  
541 -
  has_curves <- !is.null(x$roc_dat)
542 -
  has_sims   <- length(auc_roc) > 1
543 -
  
544 -
  if (!has_sims) {
545 -
    roc_msg <- sprintf("%.3f", round(auc_roc, 3))
546 -
    prc_msg <- sprintf("%.3f", round(auc_prc, 3))
547 -
  } else {
548 -
    roc_msg <- sprintf("%.3f [80%%: %.3f - %.3f]",
549 -
                       round(mean(auc_roc), 3), 
550 -
                       round(quantile(auc_roc, 0.1), 3), 
551 -
                       round(quantile(auc_roc, 0.9), 3))
552 -
    prc_msg <- sprintf("%.3f [80%%: %.3f - %.3f]",
553 -
                       round(mean(auc_prc), 3), 
554 -
                       round(quantile(auc_prc, 0.1), 3), 
555 -
                       round(quantile(auc_prc, 0.9), 3))
556 -
  }
557 -
  
558 -
  cat("mcmcRocPrc object\n")
559 -
  cat(sprintf("curves: %s; fullsims: %s\n", has_curves, has_sims))
560 -
  cat(sprintf("AUC-ROC: %s\n", roc_msg))
561 -
  cat(sprintf("AUC-PR:  %s\n", prc_msg))
562 -
  
563 -
  invisible(x)
564 -
}
565 -
566 -
#' @rdname mcmcRocPrc
567 -
#' 
568 -
#' @param n plot method: if `fullsims = TRUE`, how many sample curves to draw?
569 -
#' @param alpha plot method: alpha value for plotting sampled curves; between 0 and 1
570 -
#' 
571 -
#' @export
572 -
plot.mcmcRocPrc <- function(x, n = 40, alpha = .5, ...) {
573 -
574 -
  stopifnot(
575 -
    "Use mcmcRocPrc(..., curves = TRUE) to generate data for plots" = (!is.null(x$roc_dat)),
576 -
    "alpha must be between 0 and 1" = (alpha >= 0 & alpha <= 1),
577 -
    "n must be > 0" = (n > 0)
578 -
  )
579 -
  
580 -
  obj<- x
581 -
  fullsims <- length(obj$roc_dat) > 1
582 -
  
583 -
  if (!fullsims) {
584 -
    
585 -
    graphics::par(mfrow = c(1, 2))
586 -
    plot(obj$roc_dat[[1]], type = "s", xlab = "FPR", ylab = "TPR")
587 -
    graphics::abline(a = 0, b = 1, lty = 3, col = "gray50")
588 -
    
589 -
    prc_dat <- obj$prc_dat[[1]]
590 -
    # use first non-NaN y-value for y[1]
591 -
    prc_dat$y[1] <- prc_dat$y[2]
592 -
    plot(prc_dat, type = "l", xlab = "TPR", ylab = "Precision",
593 -
         ylim = c(0, 1))
594 -
    graphics::abline(a = attr(x, "y_pos_rate"), b = 0, lty = 3, col = "gray50")
595 -
    
596 -
  } else {
597 -
    
598 -
    graphics::par(mfrow = c(1, 2))
599 -
    
600 -
    roc_dat <- obj$roc_dat
601 -
    
602 -
    x <- lapply(roc_dat, `[[`, 1)
603 -
    x <- do.call(cbind, x)
604 -
    colnames(x) <- paste0("sim", 1:ncol(x))
605 -
    
606 -
    y <- lapply(roc_dat, `[[`, 2)
607 -
    y <- do.call(cbind, y)
608 -
    colnames(y) <- paste0("sim", 1:ncol(y))
609 -
    
610 -
    xavg <- rowMeans(x)
611 -
    yavg <- rowMeans(y)
612 -
    
613 -
    plot(xavg, yavg, type = "n", xlab = "FPR", ylab = "TPR")
614 -
    samples <- sample(1:ncol(x), n)
615 -
    for (i in samples) {
616 -
      graphics::lines(
617 -
        x[, i], y[, i], type = "s",
618 -
        col = grDevices::rgb(127, 127, 127, alpha = alpha*255, maxColorValue = 255)
619 -
      )
620 -
    }
621 -
    graphics::lines(xavg, yavg, type = "s")
622 -
    
623 -
    # PRC
624 -
    # The elements of prc_dat have different lengths, unlike roc_dat, so we
625 -
    # have to do the central curve differently.
626 -
    prc_dat <- obj$prc_dat
627 -
628 -
    x <- lapply(prc_dat, `[[`, 1)
629 -
    y <- lapply(prc_dat, `[[`, 2)
630 -
    
631 -
    # Instead of combining the list of curve coordinates from each sample into
632 -
    # two x and y matrices, we can first make a point cloud with all curve 
633 -
    # points from all samples, and then average the y values at all distinct
634 -
    # x coordinates. The x-axis plots recall (TPR), which will only have as 
635 -
    # many distinct values as there are positives in the data, so this does 
636 -
    # not lose any information about the x coordinates. 
637 -
    point_cloud <- data.frame(
638 -
      x = unlist(x),
639 -
      y = unlist(y)
640 -
    )
641 -
    point_cloud <- stats::aggregate(point_cloud[, "y", drop = FALSE], 
642 -
                                    # factor implicitly encodes distinct values only,
643 -
                                    # since they will get the same labels
644 -
                                    by = list(x = as.factor(point_cloud$x)), 
645 -
                                    FUN = mean)
646 -
    point_cloud$x <- as.numeric(as.character(point_cloud$x))
647 -
    xavg <- point_cloud$x
648 -
    yavg <- point_cloud$y
649 -
650 -
    plot(xavg, yavg, type = "n", xlab = "TPR", ylab = "Precision", ylim = c(0, 1))
651 -
    samples <- sample(1:length(prc_dat), n)
652 -
    for (i in samples) {
653 -
      graphics::lines(
654 -
        x[[i]], y[[i]], 
655 -
        col = grDevices::rgb(127, 127, 127, alpha = alpha*255, maxColorValue = 255)
656 -
      )
657 -
    }
658 -
    graphics::lines(xavg, yavg)
659 -
    
660 -
  }
661 -
  
662 -
  invisible(x)
663 -
}
664 -
665 -
#' @rdname mcmcRocPrc
666 -
#' 
667 -
#' @param row.names see [base::as.data.frame()] 
668 -
#' @param optional see [base::as.data.frame()]
669 -
#' @param what which information to extract and convert to a data frame?
670 -
#' 
671 -
#' @export
672 -
as.data.frame.mcmcRocPrc <- function(x, row.names = NULL, optional = FALSE,
673 -
                                     what = c("auc", "roc", "prc"), ...) {
674 -
  what <- match.arg(what)
675 -
  if (what=="auc") {
676 -
    # all 4 output types have AUC, so this should work across the board
677 -
    return(as.data.frame(x[c("area_under_roc", "area_under_prc")]))
678 -
    
679 -
  } else if (what %in% c("roc", "prc")) {
680 -
    if (what=="roc") element <- "roc_dat" else element <- "prc_dat"
681 -
    
682 -
    # if curves was FALSE, there will be no curve data...
683 -
    if (is.null(x[[element]])) {
684 -
      stop("No curve data; use mcmcRocPrc(..., curves = TRUE)")
685 -
    }
686 -
    
687 -
    # Otherwise, there will be either one set of coordinates if mcmcmRegPrc()
688 -
    # was called with fullsims = FALSE, or else N_sims curve data sets.
689 -
    # If the latter, we can return a long data frame with an identifying 
690 -
    # "sim" column to delineate the sim sets. To ensure consistency in output,
691 -
    # also add this column when fullsims = FALSE.
692 -
    
693 -
    # averaged, single coordinate set
694 -
    if (length(x[[element]])==1L) {
695 -
      return(data.frame(sim = 1L, x[[element]][[1]]))
696 -
    }
697 -
    
698 -
    # full sims
699 -
    # add a unique ID to each coordinate set
700 -
    outlist <- x[[element]]
701 -
    outlist <- Map(cbind, sim = (1:length(outlist)), outlist)
702 -
    # combine into long data frame
703 -
    outdf <- do.call(rbind, outlist)
704 -
    return(outdf)
705 -
  } 
706 -
  stop("Developer error (I should not be here): please file an issue on GitHub")  # nocov
707 -
}
708 -
709 -
710 -
711 549

@@ -0,0 +1,183 @@
Loading
1 +
#
2 +
#   Methods for class "mcmcRocPrc", generated by mcmcRocPrc()
3 +
#
4 +
5 +
#' @rdname mcmcRocPrc
6 +
#' 
7 +
#' @export
8 +
print.mcmcRocPrc <- function(x, ...) {
9 +
  
10 +
  auc_roc <- x$area_under_roc
11 +
  auc_prc <- x$area_under_prc
12 +
  
13 +
  has_curves <- !is.null(x$roc_dat)
14 +
  has_sims   <- length(auc_roc) > 1
15 +
  
16 +
  if (!has_sims) {
17 +
    roc_msg <- sprintf("%.3f", round(auc_roc, 3))
18 +
    prc_msg <- sprintf("%.3f", round(auc_prc, 3))
19 +
  } else {
20 +
    roc_msg <- sprintf("%.3f [80%%: %.3f - %.3f]",
21 +
                       round(mean(auc_roc), 3), 
22 +
                       round(quantile(auc_roc, 0.1), 3), 
23 +
                       round(quantile(auc_roc, 0.9), 3))
24 +
    prc_msg <- sprintf("%.3f [80%%: %.3f - %.3f]",
25 +
                       round(mean(auc_prc), 3), 
26 +
                       round(quantile(auc_prc, 0.1), 3), 
27 +
                       round(quantile(auc_prc, 0.9), 3))
28 +
  }
29 +
  
30 +
  cat("mcmcRocPrc object\n")
31 +
  cat(sprintf("curves: %s; fullsims: %s\n", has_curves, has_sims))
32 +
  cat(sprintf("AUC-ROC: %s\n", roc_msg))
33 +
  cat(sprintf("AUC-PR:  %s\n", prc_msg))
34 +
  
35 +
  invisible(x)
36 +
}
37 +
38 +
#' @rdname mcmcRocPrc
39 +
#' 
40 +
#' @param n plot method: if `fullsims = TRUE`, how many sample curves to draw?
41 +
#' @param alpha plot method: alpha value for plotting sampled curves; between 0 and 1
42 +
#' 
43 +
#' @export
44 +
plot.mcmcRocPrc <- function(x, n = 40, alpha = .5, ...) {
45 +
  
46 +
  stopifnot(
47 +
    "Use mcmcRocPrc(..., curves = TRUE) to generate data for plots" = (!is.null(x$roc_dat)),
48 +
    "alpha must be between 0 and 1" = (alpha >= 0 & alpha <= 1),
49 +
    "n must be > 0" = (n > 0)
50 +
  )
51 +
  
52 +
  obj<- x
53 +
  fullsims <- length(obj$roc_dat) > 1
54 +
  
55 +
  if (!fullsims) {
56 +
    
57 +
    graphics::par(mfrow = c(1, 2))
58 +
    plot(obj$roc_dat[[1]], type = "s", xlab = "FPR", ylab = "TPR")
59 +
    graphics::abline(a = 0, b = 1, lty = 3, col = "gray50")
60 +
    
61 +
    prc_dat <- obj$prc_dat[[1]]
62 +
    # use first non-NaN y-value for y[1]
63 +
    prc_dat$y[1] <- prc_dat$y[2]
64 +
    plot(prc_dat, type = "l", xlab = "TPR", ylab = "Precision",
65 +
         ylim = c(0, 1))
66 +
    graphics::abline(a = attr(x, "y_pos_rate"), b = 0, lty = 3, col = "gray50")
67 +
    
68 +
  } else {
69 +
    
70 +
    graphics::par(mfrow = c(1, 2))
71 +
    
72 +
    roc_dat <- obj$roc_dat
73 +
    
74 +
    x <- lapply(roc_dat, `[[`, 1)
75 +
    x <- do.call(cbind, x)
76 +
    colnames(x) <- paste0("sim", 1:ncol(x))
77 +
    
78 +
    y <- lapply(roc_dat, `[[`, 2)
79 +
    y <- do.call(cbind, y)
80 +
    colnames(y) <- paste0("sim", 1:ncol(y))
81 +
    
82 +
    xavg <- rowMeans(x)
83 +
    yavg <- rowMeans(y)
84 +
    
85 +
    plot(xavg, yavg, type = "n", xlab = "FPR", ylab = "TPR")
86 +
    samples <- sample(1:ncol(x), n)
87 +
    for (i in samples) {
88 +
      graphics::lines(
89 +
        x[, i], y[, i], type = "s",
90 +
        col = grDevices::rgb(127, 127, 127, alpha = alpha*255, maxColorValue = 255)
91 +
      )
92 +
    }
93 +
    graphics::lines(xavg, yavg, type = "s")
94 +
    
95 +
    # PRC
96 +
    # The elements of prc_dat have different lengths, unlike roc_dat, so we
97 +
    # have to do the central curve differently.
98 +
    prc_dat <- obj$prc_dat
99 +
    
100 +
    x <- lapply(prc_dat, `[[`, 1)
101 +
    y <- lapply(prc_dat, `[[`, 2)
102 +
    
103 +
    # Instead of combining the list of curve coordinates from each sample into
104 +
    # two x and y matrices, we can first make a point cloud with all curve 
105 +
    # points from all samples, and then average the y values at all distinct
106 +
    # x coordinates. The x-axis plots recall (TPR), which will only have as 
107 +
    # many distinct values as there are positives in the data, so this does 
108 +
    # not lose any information about the x coordinates. 
109 +
    point_cloud <- data.frame(
110 +
      x = unlist(x),
111 +
      y = unlist(y)
112 +
    )
113 +
    point_cloud <- stats::aggregate(point_cloud[, "y", drop = FALSE], 
114 +
                                    # factor implicitly encodes distinct values only,
115 +
                                    # since they will get the same labels
116 +
                                    by = list(x = as.factor(point_cloud$x)), 
117 +
                                    FUN = mean)
118 +
    point_cloud$x <- as.numeric(as.character(point_cloud$x))
119 +
    xavg <- point_cloud$x
120 +
    yavg <- point_cloud$y
121 +
    
122 +
    plot(xavg, yavg, type = "n", xlab = "TPR", ylab = "Precision", ylim = c(0, 1))
123 +
    samples <- sample(1:length(prc_dat), n)
124 +
    for (i in samples) {
125 +
      graphics::lines(
126 +
        x[[i]], y[[i]], 
127 +
        col = grDevices::rgb(127, 127, 127, alpha = alpha*255, maxColorValue = 255)
128 +
      )
129 +
    }
130 +
    graphics::lines(xavg, yavg)
131 +
    
132 +
  }
133 +
  
134 +
  invisible(x)
135 +
}
136 +
137 +
#' @rdname mcmcRocPrc
138 +
#' 
139 +
#' @param row.names see [base::as.data.frame()] 
140 +
#' @param optional see [base::as.data.frame()]
141 +
#' @param what which information to extract and convert to a data frame?
142 +
#' 
143 +
#' @export
144 +
as.data.frame.mcmcRocPrc <- function(x, row.names = NULL, optional = FALSE,
145 +
                                     what = c("auc", "roc", "prc"), ...) {
146 +
  what <- match.arg(what)
147 +
  if (what=="auc") {
148 +
    # all 4 output types have AUC, so this should work across the board
149 +
    return(as.data.frame(x[c("area_under_roc", "area_under_prc")]))
150 +
    
151 +
  } else if (what %in% c("roc", "prc")) {
152 +
    if (what=="roc") element <- "roc_dat" else element <- "prc_dat"
153 +
    
154 +
    # if curves was FALSE, there will be no curve data...
155 +
    if (is.null(x[[element]])) {
156 +
      stop("No curve data; use mcmcRocPrc(..., curves = TRUE)")
157 +
    }
158 +
    
159 +
    # Otherwise, there will be either one set of coordinates if mcmcmRegPrc()
160 +
    # was called with fullsims = FALSE, or else N_sims curve data sets.
161 +
    # If the latter, we can return a long data frame with an identifying 
162 +
    # "sim" column to delineate the sim sets. To ensure consistency in output,
163 +
    # also add this column when fullsims = FALSE.
164 +
    
165 +
    # averaged, single coordinate set
166 +
    if (length(x[[element]])==1L) {
167 +
      return(data.frame(sim = 1L, x[[element]][[1]]))
168 +
    }
169 +
    
170 +
    # full sims
171 +
    # add a unique ID to each coordinate set
172 +
    outlist <- x[[element]]
173 +
    outlist <- Map(cbind, sim = (1:length(outlist)), outlist)
174 +
    # combine into long data frame
175 +
    outdf <- do.call(rbind, outlist)
176 +
    return(outdf)
177 +
  } 
178 +
  stop("Developer error (I should not be here): please file an issue on GitHub")  # nocov
179 +
}
180 +
181 +
182 +
183 +
Files Coverage
R 83.11%
Project Totals (11 files) 83.11%
1
comment: false
2

3
coverage:
4
  status:
5
    project:
6
      default:
7
        target: auto
8
        threshold: 1%
9
    patch:
10
      default:
11
        target: auto
12
        threshold: 1%
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading