1 #'This function calculates predicted probabilities for "average" cases after a Bayesian  2 #'logit or probit model. For an explanation of predicted probabilities for "average" cases,  3 #'see e.g. King, Tomz & Wittenberg (2000, American Journal of Political Science 44(2): 347-361)  4 #'@title Predicted Probabilities using Bayesian MCMC estimates for the "Average" Case  5 #'@description This function calculates predicted probabilities for "average" cases after  6 #'a Bayesian logit or probit model. As "average" cases, this function calculates the median  7 #'value of each predictor. For an explanation of predicted probabilities for  8 #'"average" cases, see e.g. King, Tomz & Wittenberg (2000, American Journal of  9 #'Political Science 44(2): 347-361).  10 #'@param modelmatrix model matrix, including intercept (if the intercept is among the  11 #'parameters estimated in the model). Create with model.matrix(formula, data).  12 #'Note: the order of columns in the model matrix must correspond to the order of columns  13 #'in the matrix of posterior draws in the \code{mcmcout} argument. See the \code{mcmcout}  14 #'argument for more.  15 #'@param mcmcout posterior distributions of all logit coefficients,  16 #'in matrix form. This can be created from rstan, MCMCpack, R2jags, etc. and transformed  17 #'into a matrix using the function as.mcmc() from the coda package for \code{jags} class  18 #'objects, as.matrix() from base R for \code{mcmc}, \code{mcmc.list}, \code{stanreg}, and  19 #'\code{stanfit} class objects, and \code{object$sims.matrix} for \code{bugs} class objects.  20 #'Note: the order of columns in this matrix must correspond to the order of columns  21 #'in the model matrix. One can do this by examining the posterior distribution matrix and sorting the  22 #'variables in the order of this matrix when creating the model matrix. A useful function for sorting  23 #'column names containing both characters and numbers as  24 #'you create the matrix of posterior distributions is \code{mixedsort()} from the gtools package.  25 #'@param xcol column number of the posterior draws (\code{mcmcout}) and model matrices  26 #'that corresponds to the explanatory variable for which to calculate associated Pr(y = 1).  27 #'Note that the columns in these matrices must match.  28 #'@param xrange name of the vector with the range of relevant values of the  29 #'explanatory variable for which to calculate associated Pr(y = 1).  30 #'@param xinterest semi-optional argument. Name of the explanatory variable for which  31 #'to calculate associated Pr(y = 1). If \code{xcol} is supplied, this is not needed.  32 #'If both are supplied, the function defaults to \code{xcol} and this argument is ignored.  33 #'@param link type of generalized linear model; a character vector set to \code{"logit"}  34 #'(default) or \code{"probit"}.  35 #'@param ci the bounds of the credible interval. Default is \code{c(0.025, 0.975)} for the 95\%  36 #'credible interval.  37 #'@param fullsims logical indicator of whether full object (based on all MCMC draws  38 #'rather than their average) will be returned. Default is \code{FALSE}. Note: The longer  39 #'\code{xrange} is, the larger the full output will be if \code{TRUE} is selected.  40 #'@references King, Gary, Michael Tomz, and Jason Wittenberg. 2000. “Making the Most  41 #'of Statistical Analyses: Improving Interpretation and Presentation.” American Journal  42 #'of Political Science 44 (2): 347–61. http://www.jstor.org/stable/2669316  43 #'@return if \code{fullsims = FALSE} (default), a tibble with 4 columns:  44 #'\itemize{  45 #'\item x: value of variable of interest, drawn from \code{xrange}  46 #'\item median_pp: median predicted Pr(y = 1) when variable of interest is set to x,  47 #'holding all other predictors to average (median) values  48 #'\item lower_pp: lower bound of credible interval of predicted probability at given x  49 #'\item upper_pp: upper bound of credible interval of predicted probability at given x  50 #'}  51 #'if \code{fullsims = TRUE}, a tibble with 3 columns:  52 #'\itemize{  53 #'\item Iteration: number of the posterior draw  54 #'\item x: value of variable of interest, drawn from \code{xrange}  55 #'\item pp: average predicted Pr(y = 1) when variable of interest is set to x, holding all other predictors to average (median) values  56 #'}  57 #'@examples  58 #' \dontshow{.old_wd <- setwd(tempdir())}  59 #' \donttest{  60 #' ## simulating data  61 #' set.seed(123456)  62 #' b0 <- 0.2 # true value for the intercept  63 #' b1 <- 0.5 # true value for first beta  64 #' b2 <- 0.7 # true value for second beta  65 #' n <- 500 # sample size  66 #' X1 <- runif(n, -1, 1)  67 #' X2 <- runif(n, -1, 1)  68 #' Z <- b0 + b1 * X1 + b2 * X2  69 #' pr <- 1 / (1 + exp(-Z)) # inv logit function  70 #' Y <- rbinom(n, 1, pr)  71 #' df <- data.frame(cbind(X1, X2, Y))  72 #'  73 #' ## formatting the data for jags  74 #' datjags <- as.list(df)  75 #' datjags$N <- length(datjags$Y)  76 #'  77 #' ## creating jags model  78 #' model <- function() {  79 #'  80 #' for(i in 1:N){  81 #' Y[i] ~ dbern(p[i]) ## Bernoulli distribution of y_i  82 #' logit(p[i]) <- mu[i] ## Logit link function  83 #' mu[i] <- b[1] +  84 #' b[2] * X1[i] +  85 #' b[3] * X2[i]  86 #' }  87 #'  88 #' for(j in 1:3){  89 #' b[j] ~ dnorm(0, 0.001) ## Use a coefficient vector for simplicity  90 #' }  91 #'  92 #'}  93 #'  94 #' params <- c("b")  95 #' inits1 <- list("b" = rep(0, 3))  96 #' inits2 <- list("b" = rep(0, 3))  97 #' inits <- list(inits1, inits2)  98 #'  99 #' ## fitting the model with R2jags  100 #' library(R2jags)  101 #' set.seed(123)  102 #' fit <- jags(data = datjags, inits = inits,  103 #' parameters.to.save = params, n.chains = 2, n.iter = 2000,  104 #' n.burnin = 1000, model.file = model)  105 #'  106 #' ### average value approach  107 #' library(coda)  108 #' xmat <- model.matrix(Y ~ X1 + X2, data = df)  109 #' mcmc <- as.mcmc(fit)  110 #' mcmc_mat <- as.matrix(mcmc)[, 1:ncol(xmat)]  111 #' X1_sim <- seq(from = min(datjags$X1),  112 #' to = max(datjags$X1),  113 #' length.out = 10)  114 #' ave_prob <- mcmcAveProb(modelmatrix = xmat,  115 #' mcmcout = mcmc_mat,  116 #' xrange = X1_sim,  117 #' xcol = 2)  118 #' }  119 #'  120 #' \dontshow{setwd(.old_wd)}  121 #'@export  122 #'  123 mcmcAveProb <- function(modelmatrix,  124  mcmcout,  125  xcol,  126  xrange,  127  xinterest,  128  link = "logit",  129  ci = c(0.025, 0.975),  130  fullsims = FALSE){  131   132  # checking arguments  133 1  if(missing(xcol) & missing(xinterest)) {  134 0  stop("Please enter a column number or name of your variable of interest.)")  135  }  136 1  if(!missing(xcol) & !missing(xinterest)) {  137 0  message("Both xcol and xinterest were supplied by user. Function defaults to xcol")  138  }  139 1  if(!missing(xinterest)) {  140 0  if(!(xinterest %in% variable.names(modelmatrix)))  141 0  stop("Variable name does not match any in the matrix. Please enter another.")  142  }  143   144 1  if(missing(modelmatrix) | missing(mcmcout) | missing(xrange)) {  145 0  stop("Please enter modelmatrix, mcmcout, and xrange arguments")  146  }  147   148 1  X <- matrix(rep(apply(X = modelmatrix,  149 1  MARGIN = 2,  150 1  FUN = function(x) median(x)),  151 1  times = length(xrange)),  152 1  nrow = length(xrange),  153 1  byrow = TRUE)  154 1  colnames(X) <- variable.names(modelmatrix)  155 1  if(!missing(xcol)) {  156 1  X[, xcol] <- xrange  157  } else {  158 0  X[ , grepl( xinterest , variable.names( X ) ) ] <- xrange  159  }  160   161 1  if(link == "logit"){  162 1  pp <- plogis(t(X %*% t(mcmcout)))  163  }  164   165 1  if(link == "probit"){  166 1  pp <- pnorm(t(X %*% t(mcmcout)))  167  }  168   169 1  colnames(pp) <- as.character(xrange)  170 1  longFrame <- reshape2::melt(pp)  171   172 1  pp_dat <- dplyr::summarize(dplyr::group_by(longFrame, .data$Var2),  173 1  median_pp = quantile(.data$value, probs = 0.5),  174 1  lower_pp = quantile(.data$value, probs = ci[1]),  175 1  upper_pp = quantile(.data\$value, probs = ci[2]))  176   177 1  names(pp_dat) <- c("x", "median_pp", "lower_pp", "upper_pp")  178   179 1  if(fullsims == FALSE){  180 1  return(pp_dat) # pp_dat was created by summarizing longFrame  181  }  182   183 0  if(fullsims == TRUE){  184 0  names(longFrame) <- c("Iteration", "x", "pp")  185 0  return(longFrame)  186  }  187   188 } 

Read our documentation on viewing source code .