mcmcRocPrc.R is quite long
1 |
#' Coefficient Plots for MCMC Output
|
|
2 |
#'
|
|
3 |
#' Coefficient plots for MCMC output using \code{ggplot2}
|
|
4 |
#'
|
|
5 |
#' @param mod Bayesian model object generated by R2jags, rjags, R2WinBUGS, R2OpenBUGS,
|
|
6 |
#' MCMCpack, rstan, rstanarm, and brms.
|
|
7 |
#' @param pars a scalar or vector of the parameters you wish to include in the table.
|
|
8 |
#' By default, \code{mcmcCoefPlot} includes all parameters saved in a model object. If a
|
|
9 |
#' model has lots of samples and lots of saved parameters, not explicitly specifying
|
|
10 |
#' a limited number of parameters to include via \code{pars} may take a long time
|
|
11 |
#' or produce an unreadable plot. \code{pars} can either be a vector with the
|
|
12 |
#' specific parameters to be included in the table e.g. \code{pars = c("beta[1]",
|
|
13 |
#' "beta[2]", "beta[3]")}, or they can be partial names that will be matched using
|
|
14 |
#' regular expressions e.g. \code{pars = "beta"} if \code{regex = TRUE}. Both of
|
|
15 |
#' these will include \code{beta[1]}, \code{beta[2]}, and \code{beta[3]} in the
|
|
16 |
#' plot. If \code{pars} is left blank, \code{mcmcCoefPlot} will exclude auxiliary
|
|
17 |
#' parameters such as \code{deviance} from JAGS or \code{lp__} from Stan.
|
|
18 |
#' @param pointest a character indicating whether to use the mean or median for
|
|
19 |
#' point estimates in the table.
|
|
20 |
#' @param ci a scalar indicating the confidence level of the uncertainty intervals.
|
|
21 |
#' @param hpdi a logical indicating whether to use highest posterior density intervals
|
|
22 |
#' or equal tailed credible intervals to capture uncertainty; default \code{FALSE}.
|
|
23 |
#' @param sort logical indicating whether to sort the point estimates to produce
|
|
24 |
#' a caterpillar or dot plot; default \code{FALSE}.
|
|
25 |
#' @param plot logical indicating whether to return a \code{ggplot} object or the
|
|
26 |
#' underlying tidy DataFrame; default \code{TRUE}.
|
|
27 |
#' @param regex use regular expression matching with \code{pars}?
|
|
28 |
#'
|
|
29 |
#' @return a \code{ggplot} object or a tidy DataFrame.
|
|
30 |
#'
|
|
31 |
#' @author Rob Williams, \email{jayrobwilliams@gmail.com}
|
|
32 |
#'
|
|
33 |
#' @examples
|
|
34 |
#' \dontshow{.old_wd <- setwd(tempdir())}
|
|
35 |
#' \donttest{
|
|
36 |
#' ## simulating data
|
|
37 |
#' set.seed(123456)
|
|
38 |
#' b0 <- 0.2 # true value for the intercept
|
|
39 |
#' b1 <- 0.5 # true value for first beta
|
|
40 |
#' b2 <- 0.7 # true value for second beta
|
|
41 |
#' n <- 500 # sample size
|
|
42 |
#' X1 <- runif(n, -1, 1)
|
|
43 |
#' X2 <- runif(n, -1, 1)
|
|
44 |
#' Z <- b0 + b1 * X1 + b2 * X2
|
|
45 |
#' pr <- 1 / (1 + exp(-Z)) # inv logit function
|
|
46 |
#' Y <- rbinom(n, 1, pr)
|
|
47 |
#' df <- data.frame(cbind(X1, X2, Y))
|
|
48 |
#'
|
|
49 |
#' ## formatting the data for jags
|
|
50 |
#' datjags <- as.list(df)
|
|
51 |
#' datjags$N <- length(datjags$Y)
|
|
52 |
#'
|
|
53 |
#' ## creating jags model
|
|
54 |
#' model <- function() {
|
|
55 |
#'
|
|
56 |
#' for(i in 1:N){
|
|
57 |
#' Y[i] ~ dbern(p[i]) ## Bernoulli distribution of y_i
|
|
58 |
#' logit(p[i]) <- mu[i] ## Logit link function
|
|
59 |
#' mu[i] <- b[1] +
|
|
60 |
#' b[2] * X1[i] +
|
|
61 |
#' b[3] * X2[i]
|
|
62 |
#' }
|
|
63 |
#'
|
|
64 |
#' for(j in 1:3){
|
|
65 |
#' b[j] ~ dnorm(0, 0.001) ## Use a coefficient vector for simplicity
|
|
66 |
#' }
|
|
67 |
#'
|
|
68 |
#' }
|
|
69 |
#'
|
|
70 |
#' params <- c("b")
|
|
71 |
#' inits1 <- list("b" = rep(0, 3))
|
|
72 |
#' inits2 <- list("b" = rep(0, 3))
|
|
73 |
#' inits <- list(inits1, inits2)
|
|
74 |
#'
|
|
75 |
#' ## fitting the model with R2jags
|
|
76 |
#' set.seed(123)
|
|
77 |
#' fit <- R2jags::jags(data = datjags, inits = inits,
|
|
78 |
#' parameters.to.save = params, n.chains = 2, n.iter = 2000,
|
|
79 |
#' n.burnin = 1000, model.file = model)
|
|
80 |
#'
|
|
81 |
#' ## generating coefficient plot with all non-auxiliary parameters
|
|
82 |
#' mcmcCoefPlot(fit)
|
|
83 |
#' }
|
|
84 |
#'
|
|
85 |
#' \dontshow{setwd(.old_wd)}
|
|
86 |
#' @export
|
|
87 |
mcmcCoefPlot <- function(mod, pars = NULL, |
|
88 |
pointest = 'mean', |
|
89 |
ci = .95, |
|
90 |
hpdi = FALSE, |
|
91 |
sort = FALSE, |
|
92 |
plot = TRUE, |
|
93 |
regex = FALSE) { |
|
94 |
|
|
95 |
## pull in unexported functions from other packages
|
|
96 |
## other options for future versions might include lifting this and adding authors as copr holders
|
|
97 | 1 |
runjags.as.mcmc.list.runjags = getFromNamespace("as.mcmc.list.runjags", "runjags") |
98 | 1 |
if (inherits(mod, what = c("jags", "rjags"))) { |
99 | 1 |
samps <- as.matrix(coda::as.mcmc(mod)) |
100 |
}
|
|
101 | 1 |
if (inherits(mod, what = "bugs")) { |
102 | 1 |
samps <- mod$sims.matrix |
103 |
}
|
|
104 | 1 |
if (inherits(mod, what = "runjags")) { |
105 | 1 |
samps <- as.matrix(runjags.as.mcmc.list.runjags(mod)) |
106 |
}
|
|
107 | 1 |
if (inherits(mod, what = c("mcmc", "mcmc.list", "stanfit", "stanreg", |
108 | 1 |
"brmsfit"))) { |
109 | 1 |
samps <- as.matrix(mod) |
110 |
}
|
|
111 |
|
|
112 | 1 |
if (is.null(pars)) { |
113 | 1 |
samps <- samps[, !grepl(pattern = 'deviance|lp__', x = colnames(samps))] |
114 | 1 |
} else if (regex) { |
115 | 1 |
samps <- samps[, grepl(pattern = paste(pars, collapse = '|'), x = colnames(samps))] |
116 |
} else { |
|
117 | 1 |
samps <- matrix(samps[, pars], nrow = nrow(samps), byrow = FALSE, |
118 | 1 |
dimnames = list(NULL, pars)) |
119 |
}
|
|
120 |
|
|
121 | 1 |
if (hpdi == FALSE) { |
122 | 1 |
samps_ci <- t(apply(samps, 2, quantile, probs = c(.5 - ci/2, .5 + ci/2))) |
123 | 1 |
} else if (hpdi == TRUE) { |
124 | 1 |
samps_ci <- coda::HPDinterval(coda::as.mcmc(samps), prob = ci) |
125 |
} else { |
|
126 | 1 |
stop("hpdi must be either true or false") |
127 |
}
|
|
128 |
|
|
129 | 1 |
if (pointest == 'mean') { |
130 | 1 |
samps_pe <- apply(samps, 2, mean) |
131 | 1 |
} else if (pointest == 'median') { |
132 | 1 |
samps_pe <- apply(samps, 2, median) |
133 |
} else { |
|
134 | 1 |
stop("pointest must be either 'mean' or 'median'") |
135 |
}
|
|
136 |
|
|
137 | 1 |
coefs <- data.frame(pe = samps_pe, samps_ci) |
138 | 1 |
if (sort) { |
139 | 1 |
coefs$variable <- factor(rownames(coefs), |
140 | 1 |
levels = rev(rownames(coefs)[order(coefs$pe, |
141 | 1 |
decreasing = TRUE)])) |
142 |
} else { |
|
143 | 1 |
coefs$variable <- factor(rownames(coefs), levels = rev(rownames(coefs))) |
144 |
}
|
|
145 | 1 |
colnames(coefs)[2:3] <- c('lo', 'hi') |
146 |
|
|
147 |
## return coefficient plot or underlying dataframe
|
|
148 | 1 |
if (!plot) { |
149 | 1 |
coefs |
150 |
} else { |
|
151 | 1 |
ggplot2::ggplot(coefs, ggplot2::aes(x = .data$variable, y = .data$pe, |
152 | 1 |
ymin = .data$lo, ymax = .data$hi)) + |
153 | 1 |
ggplot2::geom_hline(yintercept = 0, lty = 2) + |
154 | 1 |
ggplot2::geom_pointrange() + |
155 | 1 |
ggplot2::coord_flip() + |
156 | 1 |
ggplot2::labs(x = '', y = '') |
157 |
}
|
|
158 |
|
|
159 |
}
|
Read our documentation on viewing source code .