mcmcRocPrc.R is quite long
1 |
#'This function calculates predicted probabilities for "average" cases after a Bayesian
|
|
2 |
#'logit or probit model. For an explanation of predicted probabilities for "average" cases,
|
|
3 |
#'see e.g. King, Tomz & Wittenberg (2000, American Journal of Political Science 44(2): 347-361)
|
|
4 |
#'@title Predicted Probabilities using Bayesian MCMC estimates for the "Average" Case
|
|
5 |
#'@description This function calculates predicted probabilities for "average" cases after
|
|
6 |
#'a Bayesian logit or probit model. As "average" cases, this function calculates the median
|
|
7 |
#'value of each predictor. For an explanation of predicted probabilities for
|
|
8 |
#'"average" cases, see e.g. King, Tomz & Wittenberg (2000, American Journal of
|
|
9 |
#'Political Science 44(2): 347-361).
|
|
10 |
#'@param modelmatrix model matrix, including intercept (if the intercept is among the
|
|
11 |
#'parameters estimated in the model). Create with model.matrix(formula, data).
|
|
12 |
#'Note: the order of columns in the model matrix must correspond to the order of columns
|
|
13 |
#'in the matrix of posterior draws in the \code{mcmcout} argument. See the \code{mcmcout}
|
|
14 |
#'argument for more.
|
|
15 |
#'@param mcmcout posterior distributions of all logit coefficients,
|
|
16 |
#'in matrix form. This can be created from rstan, MCMCpack, R2jags, etc. and transformed
|
|
17 |
#'into a matrix using the function as.mcmc() from the coda package for \code{jags} class
|
|
18 |
#'objects, as.matrix() from base R for \code{mcmc}, \code{mcmc.list}, \code{stanreg}, and
|
|
19 |
#'\code{stanfit} class objects, and \code{object$sims.matrix} for \code{bugs} class objects.
|
|
20 |
#'Note: the order of columns in this matrix must correspond to the order of columns
|
|
21 |
#'in the model matrix. One can do this by examining the posterior distribution matrix and sorting the
|
|
22 |
#'variables in the order of this matrix when creating the model matrix. A useful function for sorting
|
|
23 |
#'column names containing both characters and numbers as
|
|
24 |
#'you create the matrix of posterior distributions is \code{mixedsort()} from the gtools package.
|
|
25 |
#'@param xcol column number of the posterior draws (\code{mcmcout}) and model matrices
|
|
26 |
#'that corresponds to the explanatory variable for which to calculate associated Pr(y = 1).
|
|
27 |
#'Note that the columns in these matrices must match.
|
|
28 |
#'@param xrange name of the vector with the range of relevant values of the
|
|
29 |
#'explanatory variable for which to calculate associated Pr(y = 1).
|
|
30 |
#'@param xinterest semi-optional argument. Name of the explanatory variable for which
|
|
31 |
#'to calculate associated Pr(y = 1). If \code{xcol} is supplied, this is not needed.
|
|
32 |
#'If both are supplied, the function defaults to \code{xcol} and this argument is ignored.
|
|
33 |
#'@param link type of generalized linear model; a character vector set to \code{"logit"}
|
|
34 |
#'(default) or \code{"probit"}.
|
|
35 |
#'@param ci the bounds of the credible interval. Default is \code{c(0.025, 0.975)} for the 95\%
|
|
36 |
#'credible interval.
|
|
37 |
#'@param fullsims logical indicator of whether full object (based on all MCMC draws
|
|
38 |
#'rather than their average) will be returned. Default is \code{FALSE}. Note: The longer
|
|
39 |
#'\code{xrange} is, the larger the full output will be if \code{TRUE} is selected.
|
|
40 |
#'@references King, Gary, Michael Tomz, and Jason Wittenberg. 2000. “Making the Most
|
|
41 |
#'of Statistical Analyses: Improving Interpretation and Presentation.” American Journal
|
|
42 |
#'of Political Science 44 (2): 347–61. http://www.jstor.org/stable/2669316
|
|
43 |
#'@return if \code{fullsims = FALSE} (default), a tibble with 4 columns:
|
|
44 |
#'\itemize{
|
|
45 |
#'\item x: value of variable of interest, drawn from \code{xrange}
|
|
46 |
#'\item median_pp: median predicted Pr(y = 1) when variable of interest is set to x,
|
|
47 |
#'holding all other predictors to average (median) values
|
|
48 |
#'\item lower_pp: lower bound of credible interval of predicted probability at given x
|
|
49 |
#'\item upper_pp: upper bound of credible interval of predicted probability at given x
|
|
50 |
#'}
|
|
51 |
#'if \code{fullsims = TRUE}, a tibble with 3 columns:
|
|
52 |
#'\itemize{
|
|
53 |
#'\item Iteration: number of the posterior draw
|
|
54 |
#'\item x: value of variable of interest, drawn from \code{xrange}
|
|
55 |
#'\item pp: average predicted Pr(y = 1) when variable of interest is set to x, holding all other predictors to average (median) values
|
|
56 |
#'}
|
|
57 |
#'@examples
|
|
58 |
#' \dontshow{.old_wd <- setwd(tempdir())}
|
|
59 |
#' \donttest{
|
|
60 |
#' ## simulating data
|
|
61 |
#' set.seed(123456)
|
|
62 |
#' b0 <- 0.2 # true value for the intercept
|
|
63 |
#' b1 <- 0.5 # true value for first beta
|
|
64 |
#' b2 <- 0.7 # true value for second beta
|
|
65 |
#' n <- 500 # sample size
|
|
66 |
#' X1 <- runif(n, -1, 1)
|
|
67 |
#' X2 <- runif(n, -1, 1)
|
|
68 |
#' Z <- b0 + b1 * X1 + b2 * X2
|
|
69 |
#' pr <- 1 / (1 + exp(-Z)) # inv logit function
|
|
70 |
#' Y <- rbinom(n, 1, pr)
|
|
71 |
#' df <- data.frame(cbind(X1, X2, Y))
|
|
72 |
#'
|
|
73 |
#' ## formatting the data for jags
|
|
74 |
#' datjags <- as.list(df)
|
|
75 |
#' datjags$N <- length(datjags$Y)
|
|
76 |
#'
|
|
77 |
#' ## creating jags model
|
|
78 |
#' model <- function() {
|
|
79 |
#'
|
|
80 |
#' for(i in 1:N){
|
|
81 |
#' Y[i] ~ dbern(p[i]) ## Bernoulli distribution of y_i
|
|
82 |
#' logit(p[i]) <- mu[i] ## Logit link function
|
|
83 |
#' mu[i] <- b[1] +
|
|
84 |
#' b[2] * X1[i] +
|
|
85 |
#' b[3] * X2[i]
|
|
86 |
#' }
|
|
87 |
#'
|
|
88 |
#' for(j in 1:3){
|
|
89 |
#' b[j] ~ dnorm(0, 0.001) ## Use a coefficient vector for simplicity
|
|
90 |
#' }
|
|
91 |
#'
|
|
92 |
#'}
|
|
93 |
#'
|
|
94 |
#' params <- c("b")
|
|
95 |
#' inits1 <- list("b" = rep(0, 3))
|
|
96 |
#' inits2 <- list("b" = rep(0, 3))
|
|
97 |
#' inits <- list(inits1, inits2)
|
|
98 |
#'
|
|
99 |
#' ## fitting the model with R2jags
|
|
100 |
#' library(R2jags)
|
|
101 |
#' set.seed(123)
|
|
102 |
#' fit <- jags(data = datjags, inits = inits,
|
|
103 |
#' parameters.to.save = params, n.chains = 2, n.iter = 2000,
|
|
104 |
#' n.burnin = 1000, model.file = model)
|
|
105 |
#'
|
|
106 |
#' ### average value approach
|
|
107 |
#' library(coda)
|
|
108 |
#' xmat <- model.matrix(Y ~ X1 + X2, data = df)
|
|
109 |
#' mcmc <- as.mcmc(fit)
|
|
110 |
#' mcmc_mat <- as.matrix(mcmc)[, 1:ncol(xmat)]
|
|
111 |
#' X1_sim <- seq(from = min(datjags$X1),
|
|
112 |
#' to = max(datjags$X1),
|
|
113 |
#' length.out = 10)
|
|
114 |
#' ave_prob <- mcmcAveProb(modelmatrix = xmat,
|
|
115 |
#' mcmcout = mcmc_mat,
|
|
116 |
#' xrange = X1_sim,
|
|
117 |
#' xcol = 2)
|
|
118 |
#' }
|
|
119 |
#'
|
|
120 |
#' \dontshow{setwd(.old_wd)}
|
|
121 |
#'@export
|
|
122 |
#'
|
|
123 |
mcmcAveProb <- function(modelmatrix, |
|
124 |
mcmcout,
|
|
125 |
xcol,
|
|
126 |
xrange,
|
|
127 |
xinterest,
|
|
128 |
link = "logit", |
|
129 |
ci = c(0.025, 0.975), |
|
130 |
fullsims = FALSE){ |
|
131 |
|
|
132 |
# checking arguments
|
|
133 | 1 |
if(missing(xcol) & missing(xinterest)) { |
134 |
stop("Please enter a column number or name of your variable of interest.)") |
|
135 |
}
|
|
136 | 1 |
if(!missing(xcol) & !missing(xinterest)) { |
137 |
message("Both xcol and xinterest were supplied by user. Function defaults to xcol") |
|
138 |
}
|
|
139 | 1 |
if(!missing(xinterest)) { |
140 |
if(!(xinterest %in% variable.names(modelmatrix))) |
|
141 |
stop("Variable name does not match any in the matrix. Please enter another.") |
|
142 |
}
|
|
143 |
|
|
144 | 1 |
if(missing(modelmatrix) | missing(mcmcout) | missing(xrange)) { |
145 |
stop("Please enter modelmatrix, mcmcout, and xrange arguments") |
|
146 |
}
|
|
147 |
|
|
148 | 1 |
X <- matrix(rep(apply(X = modelmatrix, |
149 | 1 |
MARGIN = 2, |
150 | 1 |
FUN = function(x) median(x)), |
151 | 1 |
times = length(xrange)), |
152 | 1 |
nrow = length(xrange), |
153 | 1 |
byrow = TRUE) |
154 | 1 |
colnames(X) <- variable.names(modelmatrix) |
155 | 1 |
if(!missing(xcol)) { |
156 | 1 |
X[, xcol] <- xrange |
157 |
} else { |
|
158 |
X[ , grepl( xinterest , variable.names( X ) ) ] <- xrange |
|
159 |
}
|
|
160 |
|
|
161 | 1 |
if(link == "logit"){ |
162 | 1 |
pp <- plogis(t(X %*% t(mcmcout))) |
163 |
}
|
|
164 |
|
|
165 | 1 |
if(link == "probit"){ |
166 | 1 |
pp <- pnorm(t(X %*% t(mcmcout))) |
167 |
}
|
|
168 |
|
|
169 | 1 |
colnames(pp) <- as.character(xrange) |
170 | 1 |
longFrame <- reshape2::melt(pp) |
171 |
|
|
172 | 1 |
pp_dat <- dplyr::summarize(dplyr::group_by(longFrame, .data$Var2), |
173 | 1 |
median_pp = quantile(.data$value, probs = 0.5), |
174 | 1 |
lower_pp = quantile(.data$value, probs = ci[1]), |
175 | 1 |
upper_pp = quantile(.data$value, probs = ci[2])) |
176 |
|
|
177 | 1 |
names(pp_dat) <- c("x", "median_pp", "lower_pp", "upper_pp") |
178 |
|
|
179 | 1 |
if(fullsims == FALSE){ |
180 | 1 |
return(pp_dat) # pp_dat was created by summarizing longFrame |
181 |
}
|
|
182 |
|
|
183 |
if(fullsims == TRUE){ |
|
184 |
names(longFrame) <- c("Iteration", "x", "pp") |
|
185 |
return(longFrame) |
|
186 |
}
|
|
187 |
|
|
188 |
}
|
Read our documentation on viewing source code .