1
|
|
#'@title First Differences of a Bayesian Logit or Probit model
|
2
|
|
#'@description R function to calculate first differences after a Bayesian logit or probit model.
|
3
|
|
#'First differences are a method to summarize effects across covariates. This quantity represents
|
4
|
|
#'the difference in predicted probabilities for each covariate for cases with low and high values
|
5
|
|
#'of the respective covariate. For each of these differences, all other variables are held constant
|
6
|
|
#'at their median. For more, see Long (1997, Sage Publications) and King, Tomz, and Wittenberg (2000,
|
7
|
|
#'American Journal of Political Science 44(2): 347-361).
|
8
|
|
#'@param modelmatrix model matrix, including intercept (if the intercept is among the
|
9
|
|
#'parameters estimated in the model). Create with model.matrix(formula, data).
|
10
|
|
#'Note: the order of columns in the model matrix must correspond to the order of columns
|
11
|
|
#'in the matrix of posterior draws in the \code{mcmcout} argument. See the \code{mcmcout}
|
12
|
|
#'argument for more.
|
13
|
|
#'@param mcmcout posterior distributions of all logit coefficients,
|
14
|
|
#'in matrix form. This can be created from rstan, MCMCpack, R2jags, etc. and transformed
|
15
|
|
#'into a matrix using the function as.mcmc() from the coda package for \code{jags} class
|
16
|
|
#'objects, as.matrix() from base R for \code{mcmc}, \code{mcmc.list}, \code{stanreg}, and
|
17
|
|
#'\code{stanfit} class objects, and \code{object$sims.matrix} for \code{bugs} class objects.
|
18
|
|
#'Note: the order of columns in this matrix must correspond to the order of columns
|
19
|
|
#'in the model matrix. One can do this by examining the posterior distribution matrix and sorting the
|
20
|
|
#'variables in the order of this matrix when creating the model matrix. A useful function for sorting
|
21
|
|
#'column names containing both characters and numbers as
|
22
|
|
#'you create the matrix of posterior distributions is \code{mixedsort()} from the gtools package.
|
23
|
|
#'@param link type of generalized linear model; a character vector set to \code{"logit"} (default)
|
24
|
|
#'or \code{"probit"}.
|
25
|
|
#'@param ci the bounds of the credible interval. Default is \code{c(0.025, 0.975)} for the 95\%
|
26
|
|
#'credible interval.
|
27
|
|
#'@param percentiles values of each predictor for which the difference in Pr(y = 1)
|
28
|
|
#'is to be calculated. Default is \code{c(0.25, 0.75)}, which will calculate the difference
|
29
|
|
#'between Pr(y = 1) for the 25th percentile and 75th percentile of the predictor. For binary
|
30
|
|
#'predictors, the function automatically calculates the difference between Pr(y = 1)
|
31
|
|
#'for x = 0 and x = 1.
|
32
|
|
#'@param fullsims logical indicator of whether full object (based on all MCMC draws
|
33
|
|
#'rather than their average) will be returned. Default is \code{FALSE}.
|
34
|
|
#'@references
|
35
|
|
#'\itemize{
|
36
|
|
#'\item King, Gary, Michael Tomz, and Jason Wittenberg. 2000. “Making the Most of Statistical
|
37
|
|
#'Analyses: Improving Interpretation and Presentation.” American Journal of Political Science
|
38
|
|
#'44 (2): 347–61. http://www.jstor.org/stable/2669316
|
39
|
|
#'\item Long, J. Scott. 1997. Regression Models for Categorical and Limited Dependent Variables.
|
40
|
|
#'Thousand Oaks: Sage Publications
|
41
|
|
#'}
|
42
|
|
#'@return An object of class \code{mcmcFD}. If \code{fullsims = FALSE} (default),
|
43
|
|
#'a data frame with five columns:
|
44
|
|
#'\itemize{
|
45
|
|
#'\item median_fd: median first difference
|
46
|
|
#'\item lower_fd: lower bound of credible interval of the first difference
|
47
|
|
#'\item upper_fd: upper bound of credible interval of the first difference
|
48
|
|
#'\item VarName: name of the variable as found in \code{modelmatrix}
|
49
|
|
#'\item VarID: identifier of the variable, based on the order of columns in
|
50
|
|
#'\code{modelmatrix} and \code{mcmcout}. Can be adjusted for plotting
|
51
|
|
#'}
|
52
|
|
#'If \code{fullsims = TRUE}, a matrix with as many columns as predictors in the model. Each row
|
53
|
|
#'is the first difference for that variable based on one set of posterior draws. Column names are taken
|
54
|
|
#'from the column names of \code{modelmatrix}.
|
55
|
|
#'
|
56
|
|
#'@examples
|
57
|
|
#' \dontshow{.old_wd <- setwd(tempdir())}
|
58
|
|
#' \donttest{
|
59
|
|
#' ## simulating data
|
60
|
|
#' set.seed(123456)
|
61
|
|
#' b0 <- 0.2 # true value for the intercept
|
62
|
|
#' b1 <- 0.5 # true value for first beta
|
63
|
|
#' b2 <- 0.7 # true value for second beta
|
64
|
|
#' n <- 500 # sample size
|
65
|
|
#' X1 <- runif(n, -1, 1)
|
66
|
|
#' X2 <- runif(n, -1, 1)
|
67
|
|
#' Z <- b0 + b1 * X1 + b2 * X2
|
68
|
|
#' pr <- 1 / (1 + exp(-Z)) # inv logit function
|
69
|
|
#' Y <- rbinom(n, 1, pr)
|
70
|
|
#' df <- data.frame(cbind(X1, X2, Y))
|
71
|
|
#'
|
72
|
|
#' ## formatting the data for jags
|
73
|
|
#' datjags <- as.list(df)
|
74
|
|
#' datjags$N <- length(datjags$Y)
|
75
|
|
#'
|
76
|
|
#' ## creating jags model
|
77
|
|
#' model <- function() {
|
78
|
|
#'
|
79
|
|
#' for(i in 1:N){
|
80
|
|
#' Y[i] ~ dbern(p[i]) ## Bernoulli distribution of y_i
|
81
|
|
#' logit(p[i]) <- mu[i] ## Logit link function
|
82
|
|
#' mu[i] <- b[1] +
|
83
|
|
#' b[2] * X1[i] +
|
84
|
|
#' b[3] * X2[i]
|
85
|
|
#' }
|
86
|
|
#'
|
87
|
|
#' for(j in 1:3){
|
88
|
|
#' b[j] ~ dnorm(0, 0.001) ## Use a coefficient vector for simplicity
|
89
|
|
#' }
|
90
|
|
#'
|
91
|
|
#' }
|
92
|
|
#'
|
93
|
|
#' params <- c("b")
|
94
|
|
#' inits1 <- list("b" = rep(0, 3))
|
95
|
|
#' inits2 <- list("b" = rep(0, 3))
|
96
|
|
#' inits <- list(inits1, inits2)
|
97
|
|
#'
|
98
|
|
#' ## fitting the model with R2jags
|
99
|
|
#' set.seed(123)
|
100
|
|
#' fit <- R2jags::jags(data = datjags, inits = inits,
|
101
|
|
#' parameters.to.save = params, n.chains = 2, n.iter = 2000,
|
102
|
|
#' n.burnin = 1000, model.file = model)
|
103
|
|
#'
|
104
|
|
#' ## running function with logit
|
105
|
|
#' xmat <- model.matrix(Y ~ X1 + X2, data = df)
|
106
|
|
#' mcmc <- coda::as.mcmc(fit)
|
107
|
|
#' mcmc_mat <- as.matrix(mcmc)[, 1:ncol(xmat)]
|
108
|
|
#' object <- mcmcFD(modelmatrix = xmat,
|
109
|
|
#' mcmcout = mcmc_mat)
|
110
|
|
#' object
|
111
|
|
#' }
|
112
|
|
#'
|
113
|
|
#' \dontshow{setwd(.old_wd)}
|
114
|
|
#'@export
|
115
|
|
#'
|
116
|
|
mcmcFD <- function(modelmatrix,
|
117
|
|
mcmcout,
|
118
|
|
link = "logit",
|
119
|
|
ci = c(0.025, 0.975),
|
120
|
|
percentiles = c(0.25, 0.75),
|
121
|
|
fullsims = FALSE) {
|
122
|
|
|
123
|
1
|
if(missing(modelmatrix) | missing(mcmcout)){
|
124
|
1
|
stop("Please enter required arguments.")
|
125
|
|
}
|
126
|
|
|
127
|
1
|
fdmat <- matrix(NA, ncol = 3, nrow = ncol(modelmatrix) - 1)
|
128
|
1
|
colnames(fdmat) <- c("median_fd", "lower_fd", "upper_fd")
|
129
|
1
|
rownames(fdmat) <- colnames(modelmatrix)[-1]
|
130
|
|
|
131
|
1
|
fdfull <- matrix(rep(NA),
|
132
|
1
|
ncol = ncol(modelmatrix) - 1,
|
133
|
1
|
nrow = nrow(mcmcout),
|
134
|
1
|
byrow = TRUE)
|
135
|
1
|
colnames(fdfull) <- colnames(modelmatrix)[-1]
|
136
|
|
|
137
|
1
|
for (i in 2:ncol(modelmatrix)){
|
138
|
|
|
139
|
1
|
X <- matrix(rep(apply(X = modelmatrix,
|
140
|
1
|
MARGIN = 2,
|
141
|
1
|
FUN = function(x) median(x)),
|
142
|
1
|
times = 2),
|
143
|
1
|
nrow = 2,
|
144
|
1
|
byrow = TRUE)
|
145
|
1
|
X[, i] <- ifelse(length(unique(modelmatrix[, i])) == 2 &
|
146
|
1
|
range(modelmatrix[, i]) == c(0, 1), c(0, 1),
|
147
|
1
|
quantile(modelmatrix[, i], probs = percentiles))
|
148
|
|
|
149
|
|
# X[, i] <- quantile(modelmatrix[, i], probs = percentiles)
|
150
|
|
|
151
|
1
|
if(link == "logit") {
|
152
|
1
|
pp <- plogis(t(X %*% t(mcmcout)))
|
153
|
1
|
} else if (link == "probit") {
|
154
|
1
|
pp <- pnorm(t(X %*% t(mcmcout)))
|
155
|
|
} else {
|
156
|
1
|
stop("Please enter valid link argument.")
|
157
|
|
}
|
158
|
|
|
159
|
|
|
160
|
1
|
fd <- pp[, 2] - pp[, 1]
|
161
|
|
|
162
|
1
|
fdmat[i-1, 1] <- quantile(fd, probs = c(0.5))
|
163
|
1
|
fdmat[i-1, 2] <- quantile(fd, probs = c(ci[1]))
|
164
|
1
|
fdmat[i-1, 3] <- quantile(fd, probs = c(ci[2]))
|
165
|
|
|
166
|
1
|
fdfull[, i-1] <- fd
|
167
|
|
|
168
|
|
}
|
169
|
|
|
170
|
1
|
fddat <- as.data.frame(fdmat)
|
171
|
1
|
fddat$VarName <- rownames(fdmat)
|
172
|
1
|
fddat$VarID <- row(fdmat)[, 1]
|
173
|
|
|
174
|
1
|
if (fullsims) {
|
175
|
1
|
return(structure(fdfull, fullsims = TRUE, class = c("mcmcFD", "matrix")))
|
176
|
|
} else {
|
177
|
1
|
return(structure(fddat, fullsims = FALSE, class = c("mcmcFD", "data.frame")))
|
178
|
|
}
|
179
|
|
|
180
|
|
}
|
181
|
|
|
182
|
|
#'@export
|
183
|
|
print.mcmcFD <- function(x, ...) {
|
184
|
1
|
if (attr(x, "fullsims")) {
|
185
|
1
|
print.table(x)
|
186
|
|
} else {
|
187
|
1
|
print.data.frame(x)
|
188
|
|
}
|
189
|
|
}
|
190
|
|
|
191
|
|
#'@title Plot Method for First Differences from MCMC output
|
192
|
|
#'@description The \code{plot} method for first differences generated from MCMC
|
193
|
|
#'output by \code{\link{mcmcFD}}. For more on this method, see Long
|
194
|
|
#'(1997, Sage Publications), and King, Tomz, and Wittenberg (2000, American
|
195
|
|
#'Journal of Political Science 44(2): 347-361). For a description of this type
|
196
|
|
#'of plot, see Figure 1 in Karreth (2018, International Interactions 44(3): 463-90).
|
197
|
|
#'@param x Output generated from \code{mcmcFD(..., full_sims = TRUE)}.
|
198
|
|
#'@param ROPE defaults to NULL. If not NULL, a numeric vector of length two,
|
199
|
|
#'defining the Region of Practical Equivalence around 0. See Kruschke (2013, Journal of
|
200
|
|
#'Experimental Psychology 143(2): 573-603) for more on the ROPE.
|
201
|
|
#'@param ... optional arguments to \code{\link[ggplot2]{theme}} from \code{\link[ggplot2:ggplot2-package]{ggplot2}}.
|
202
|
|
#'@references
|
203
|
|
#'\itemize{
|
204
|
|
#'\item Karreth, Johannes. 2018. “The Economic Leverage of International Organizations in Interstate Disputes.”
|
205
|
|
#'International Interactions 44 (3): 463-90. https://doi.org/10.1080/03050629.2018.1389728.
|
206
|
|
#'\item King, Gary, Michael Tomz, and Jason Wittenberg. 2000. “Making the Most of Statistical
|
207
|
|
#'Analyses: Improving Interpretation and Presentation.” American Journal of Political Science
|
208
|
|
#'44 (2): 347–61. http://www.jstor.org/stable/2669316.
|
209
|
|
#'\item Kruschke, John K. 2013. “Bayesian Estimation Supersedes the T-Test.” Journal of
|
210
|
|
#'Experimental Psychology: General 142 (2): 573–603. https://doi.org/10.1037/a0029146.
|
211
|
|
#'\item Long, J. Scott. 1997. Regression Models for Categorical and Limited Dependent Variables.
|
212
|
|
#'Thousand Oaks: Sage Publications.
|
213
|
|
#'}
|
214
|
|
#'@return a density plot of the differences in probabilities. The plot is made with ggplot2 and can be
|
215
|
|
#'passed on as an object to customize. Annotated numbers show the percent of posterior draws with
|
216
|
|
#'the same sign as the median estimate (if \code{ROPE = NULL}) or on the same side of the
|
217
|
|
#'ROPE as the median estimate (if \code{ROPE} is specified).
|
218
|
|
#'
|
219
|
|
#'@seealso \code{\link{mcmcFD}}
|
220
|
|
#'
|
221
|
|
#'@method plot mcmcFD
|
222
|
|
#'
|
223
|
|
#'@examples
|
224
|
|
#' \dontshow{.old_wd <- setwd(tempdir())}
|
225
|
|
#' \donttest{
|
226
|
|
#' ## simulating data
|
227
|
|
#' set.seed(123456)
|
228
|
|
#' b0 <- 0.2 # true value for the intercept
|
229
|
|
#' b1 <- 0.5 # true value for first beta
|
230
|
|
#' b2 <- 0.7 # true value for second beta
|
231
|
|
#' n <- 500 # sample size
|
232
|
|
#' X1 <- runif(n, -1, 1)
|
233
|
|
#' X2 <- runif(n, -1, 1)
|
234
|
|
#' Z <- b0 + b1 * X1 + b2 * X2
|
235
|
|
#' pr <- 1 / (1 + exp(-Z)) # inv logit function
|
236
|
|
#' Y <- rbinom(n, 1, pr)
|
237
|
|
#' df <- data.frame(cbind(X1, X2, Y))
|
238
|
|
#'
|
239
|
|
#' ## formatting the data for jags
|
240
|
|
#' datjags <- as.list(df)
|
241
|
|
#' datjags$N <- length(datjags$Y)
|
242
|
|
#'
|
243
|
|
#' ## creating jags model
|
244
|
|
#' model <- function() {
|
245
|
|
#'
|
246
|
|
#' for(i in 1:N){
|
247
|
|
#' Y[i] ~ dbern(p[i]) ## Bernoulli distribution of y_i
|
248
|
|
#' logit(p[i]) <- mu[i] ## Logit link function
|
249
|
|
#' mu[i] <- b[1] +
|
250
|
|
#' b[2] * X1[i] +
|
251
|
|
#' b[3] * X2[i]
|
252
|
|
#' }
|
253
|
|
#'
|
254
|
|
#' for(j in 1:3){
|
255
|
|
#' b[j] ~ dnorm(0, 0.001) ## Use a coefficient vector for simplicity
|
256
|
|
#' }
|
257
|
|
#'
|
258
|
|
#' }
|
259
|
|
#'
|
260
|
|
#' params <- c("b")
|
261
|
|
#' inits1 <- list("b" = rep(0, 3))
|
262
|
|
#' inits2 <- list("b" = rep(0, 3))
|
263
|
|
#' inits <- list(inits1, inits2)
|
264
|
|
#'
|
265
|
|
#' ## fitting the model with R2jags
|
266
|
|
#' set.seed(123)
|
267
|
|
#' fit <- R2jags::jags(data = datjags, inits = inits,
|
268
|
|
#' parameters.to.save = params, n.chains = 2, n.iter = 2000,
|
269
|
|
#' n.burnin = 1000, model.file = model)
|
270
|
|
#'
|
271
|
|
#' ## preparing data for mcmcFD()
|
272
|
|
#' xmat <- model.matrix(Y ~ X1 + X2, data = df)
|
273
|
|
#' mcmc <- coda::as.mcmc(fit)
|
274
|
|
#' mcmc_mat <- as.matrix(mcmc)[, 1:ncol(xmat)]
|
275
|
|
#'
|
276
|
|
#' ## plotting with mcmcFDplot()
|
277
|
|
#' full <- mcmcFD(modelmatrix = xmat,
|
278
|
|
#' mcmcout = mcmc_mat,
|
279
|
|
#' fullsims = TRUE)
|
280
|
|
#' plot(full)
|
281
|
|
#'
|
282
|
|
#' }
|
283
|
|
#'
|
284
|
|
#' \dontshow{setwd(.old_wd)}
|
285
|
|
#'
|
286
|
|
#' @export
|
287
|
|
#'
|
288
|
|
plot.mcmcFD <- function(x,
|
289
|
|
ROPE = NULL,
|
290
|
|
...) {
|
291
|
|
|
292
|
1
|
if (!attr(x, "fullsims")) {
|
293
|
1
|
stop("full simulations must be used to plot posterior distribution")
|
294
|
|
}
|
295
|
|
|
296
|
1
|
ROPE <- check_ROPE_argument(ROPE)
|
297
|
|
|
298
|
|
## multiply by 100 for percentage point change in output
|
299
|
1
|
x <- x * 100
|
300
|
|
|
301
|
|
# convert x to long data frame
|
302
|
1
|
fd_dat <- tidyr::gather(as.data.frame(x))
|
303
|
|
|
304
|
|
# create first plot
|
305
|
|
|
306
|
1
|
if(!is.null(ROPE)) {
|
307
|
1
|
fd_plot <- ggplot2::ggplot(data = fd_dat, aes(x = .data$value, y = .data$key)) +
|
308
|
1
|
ggplot2::geom_rect(xmin = ROPE[1], xmax = ROPE[2], ymin = 0, ymax = Inf, fill = "black") +
|
309
|
1
|
ggridges::stat_density_ridges(quantile_lines = TRUE,
|
310
|
1
|
quantiles = c(0.025, 0.5, 0.975),
|
311
|
1
|
vline_color = "white") +
|
312
|
1
|
ggplot2::scale_x_continuous(labels = function(x) x * 100) +
|
313
|
1
|
ggplot2::xlab("Percentage point change in Pr(y = 1)") +
|
314
|
1
|
ggplot2::ylab("")
|
315
|
|
|
316
|
|
# calculate area left/right of ROPE
|
317
|
1
|
fd_outROPE <- apply(x, 2,
|
318
|
1
|
function(x) ifelse(median(x) < 0,
|
319
|
1
|
sum(x < ROPE[1]) / length(x),
|
320
|
1
|
sum(x > ROPE[2]) / length(x)))
|
321
|
1
|
fd_annotate <- data.frame(xpos = apply(x, 2,
|
322
|
1
|
function(x) ifelse(median(x) < 0,
|
323
|
1
|
quantile(x, probs = 0.01) - 0.02,
|
324
|
1
|
quantile(x, probs = 0.99) + 0.02)),
|
325
|
1
|
ypos = as.factor(colnames(x)),
|
326
|
1
|
outROPE = paste(round(fd_outROPE * 100, digits = 1), "%", sep = ""))
|
327
|
|
|
328
|
|
# final plot
|
329
|
1
|
fd_plot <- fd_plot +
|
330
|
1
|
geom_text(data = fd_annotate, aes(x = .data$xpos, y = .data$ypos, label = .data$outROPE),
|
331
|
1
|
color = "black", nudge_y = 0.1, size = 4) +
|
332
|
1
|
ggplot2::theme(...)
|
333
|
|
} else {
|
334
|
1
|
fd_plot <- ggplot2::ggplot(data = fd_dat,
|
335
|
1
|
aes(x = .data$value, y = .data$key)) +
|
336
|
1
|
ggplot2::geom_vline(xintercept = 0) +
|
337
|
1
|
ggridges::stat_density_ridges(quantile_lines = TRUE,
|
338
|
1
|
quantiles = c(0.025, 0.5, 0.975),
|
339
|
1
|
vline_color = "white") +
|
340
|
1
|
ggplot2::xlab("Percentage point change in Pr(y = 1)") +
|
341
|
1
|
ggplot2::ylab("")
|
342
|
|
|
343
|
|
# calculate area left/right of 0
|
344
|
1
|
fd_out0 <- apply(x, 2,
|
345
|
1
|
function(x) ifelse(median(x) < 0,
|
346
|
1
|
sum(x < 0) / length(x),
|
347
|
1
|
sum(x > 0) / length(x)))
|
348
|
1
|
fd_annotate <- data.frame(xpos = apply(x, 2,
|
349
|
1
|
function(x) ifelse(median(x) < 0,
|
350
|
1
|
quantile(x, probs = 0.01) - 0.02,
|
351
|
1
|
quantile(x, probs = 0.99) + 0.02)),
|
352
|
1
|
ypos = as.factor(colnames(x)),
|
353
|
1
|
out0 = paste(round(fd_out0 * 100, digits = 1), "%", sep = ""))
|
354
|
|
|
355
|
|
# final plot
|
356
|
1
|
fd_plot <- fd_plot +
|
357
|
1
|
ggplot2::geom_text(data = fd_annotate,
|
358
|
1
|
aes(x = .data$xpos, y = .data$ypos, label = .data$out0),
|
359
|
1
|
color = "black", nudge_y = 0.1, size = 4) +
|
360
|
1
|
ggplot2::theme(...)
|
361
|
|
|
362
|
|
}
|
363
|
|
|
364
|
1
|
fd_plot
|
365
|
|
|
366
|
|
}
|
367
|
|
|
368
|
|
|
369
|
|
## mcmcFDplot - deprecated
|
370
|
|
|
371
|
|
#'@title Plot First Differences from MCMC output
|
372
|
|
#'@description R function to plot first differences generated from MCMC output.
|
373
|
|
#'For more on this method, see the documentation for \code{mcmcFD()}, Long (1997,
|
374
|
|
#'Sage Publications), and King, Tomz, and Wittenberg (2000, American Journal
|
375
|
|
#'of Political Science 44(2): 347-361). For a description of this type of plot,
|
376
|
|
#'see Figure 1 in Karreth (2018, International Interactions 44(3): 463-90).
|
377
|
|
#'@param fdfull Output generated from \code{mcmcFD(..., full_sims = TRUE)}.
|
378
|
|
#'@param ROPE defaults to NULL. If not NULL, a numeric vector of length two,
|
379
|
|
#'defining the Region of Practical Equivalence around 0. See Kruschke (2013, Journal of
|
380
|
|
#'Experimental Psychology 143(2): 573-603) for more on the ROPE.
|
381
|
|
#'@references
|
382
|
|
#'\itemize{
|
383
|
|
#'\item Karreth, Johannes. 2018. “The Economic Leverage of International Organizations in Interstate Disputes.”
|
384
|
|
#'International Interactions 44 (3): 463-90. https://doi.org/10.1080/03050629.2018.1389728.
|
385
|
|
#'\item King, Gary, Michael Tomz, and Jason Wittenberg. 2000. “Making the Most of Statistical
|
386
|
|
#'Analyses: Improving Interpretation and Presentation.” American Journal of Political Science
|
387
|
|
#'44 (2): 347–61. http://www.jstor.org/stable/2669316.
|
388
|
|
#'\item Kruschke, John K. 2013. “Bayesian Estimation Supersedes the T-Test.” Journal of
|
389
|
|
#'Experimental Psychology: General 142 (2): 573–603. https://doi.org/10.1037/a0029146.
|
390
|
|
#'\item Long, J. Scott. 1997. Regression Models for Categorical and Limited Dependent Variables.
|
391
|
|
#'Thousand Oaks: Sage Publications.
|
392
|
|
#'}
|
393
|
|
#'@return a density plot of the differences in probabilities. The plot is made with ggplot2 and can be
|
394
|
|
#'passed on as an object to customize. Annotated numbers show the percent of posterior draws with
|
395
|
|
#'the same sign as the median estimate (if \code{ROPE = NULL}) or on the same side of the
|
396
|
|
#'ROPE as the median estimate (if \code{ROPE} is specified).
|
397
|
|
#'
|
398
|
|
#'@examples
|
399
|
|
#' \dontshow{.old_wd <- setwd(tempdir())}
|
400
|
|
#' \donttest{
|
401
|
|
#' ## simulating data
|
402
|
|
#' set.seed(123456)
|
403
|
|
#' b0 <- 0.2 # true value for the intercept
|
404
|
|
#' b1 <- 0.5 # true value for first beta
|
405
|
|
#' b2 <- 0.7 # true value for second beta
|
406
|
|
#' n <- 500 # sample size
|
407
|
|
#' X1 <- runif(n, -1, 1)
|
408
|
|
#' X2 <- runif(n, -1, 1)
|
409
|
|
#' Z <- b0 + b1 * X1 + b2 * X2
|
410
|
|
#' pr <- 1 / (1 + exp(-Z)) # inv logit function
|
411
|
|
#' Y <- rbinom(n, 1, pr)
|
412
|
|
#' df <- data.frame(cbind(X1, X2, Y))
|
413
|
|
#'
|
414
|
|
#' ## formatting the data for jags
|
415
|
|
#' datjags <- as.list(df)
|
416
|
|
#' datjags$N <- length(datjags$Y)
|
417
|
|
#'
|
418
|
|
#' ## creating jags model
|
419
|
|
#' model <- function() {
|
420
|
|
#'
|
421
|
|
#' for(i in 1:N){
|
422
|
|
#' Y[i] ~ dbern(p[i]) ## Bernoulli distribution of y_i
|
423
|
|
#' logit(p[i]) <- mu[i] ## Logit link function
|
424
|
|
#' mu[i] <- b[1] +
|
425
|
|
#' b[2] * X1[i] +
|
426
|
|
#' b[3] * X2[i]
|
427
|
|
#' }
|
428
|
|
#'
|
429
|
|
#' for(j in 1:3){
|
430
|
|
#' b[j] ~ dnorm(0, 0.001) ## Use a coefficient vector for simplicity
|
431
|
|
#' }
|
432
|
|
#'
|
433
|
|
#' }
|
434
|
|
#'
|
435
|
|
#' params <- c("b")
|
436
|
|
#' inits1 <- list("b" = rep(0, 3))
|
437
|
|
#' inits2 <- list("b" = rep(0, 3))
|
438
|
|
#' inits <- list(inits1, inits2)
|
439
|
|
#'
|
440
|
|
#' ## fitting the model with R2jags
|
441
|
|
#' set.seed(123)
|
442
|
|
#' fit <- R2jags::jags(data = datjags, inits = inits,
|
443
|
|
#' parameters.to.save = params, n.chains = 2, n.iter = 2000,
|
444
|
|
#' n.burnin = 1000, model.file = model)
|
445
|
|
#'
|
446
|
|
#' ## preparing data for mcmcFD()
|
447
|
|
#' xmat <- model.matrix(Y ~ X1 + X2, data = df)
|
448
|
|
#' mcmc <- coda::as.mcmc(fit)
|
449
|
|
#' mcmc_mat <- as.matrix(mcmc)[, 1:ncol(xmat)]
|
450
|
|
#'
|
451
|
|
#' ## plotting with mcmcFDplot()
|
452
|
|
#' full <- mcmcFD(modelmatrix = xmat,
|
453
|
|
#' mcmcout = mcmc_mat,
|
454
|
|
#' fullsims = TRUE)
|
455
|
|
#' # suppress deprecated warning for R check
|
456
|
|
#' suppressWarnings(mcmcFDplot(full))
|
457
|
|
#'
|
458
|
|
#' }
|
459
|
|
#'
|
460
|
|
#' \dontshow{setwd(.old_wd)}
|
461
|
|
#'
|
462
|
|
#' @name mcmcFDplot-deprecated
|
463
|
|
#' @usage mcmcFDplot(fdfull, ROPE = NULL)
|
464
|
|
#' @seealso \code{\link{BayesPostEst-deprecated}}
|
465
|
|
#' @seealso \code{\link{mcmcFD}}
|
466
|
|
#' @keywords internal
|
467
|
|
#'
|
468
|
|
#' @rdname BayesPostEst-deprecated
|
469
|
|
#' @section \code{mcmcFDplot}:
|
470
|
|
#' For \code{mcmcFDplot}, use \code{\link{plot.mcmcFD}}.
|
471
|
|
#'
|
472
|
|
#' @export
|
473
|
|
#'
|
474
|
|
|
475
|
|
mcmcFDplot <- function(fdfull,
|
476
|
|
ROPE = NULL){
|
477
|
|
|
478
|
1
|
.Deprecated("plot.mcmcFD", package = "BayesPostEst")
|
479
|
|
|
480
|
1
|
ROPE <- check_ROPE_argument(ROPE)
|
481
|
|
|
482
|
1
|
fdfull <- fdfull * 100
|
483
|
|
|
484
|
1
|
fd_dat <- tidyr::gather(as.data.frame(fdfull))
|
485
|
|
|
486
|
|
# create first plot
|
487
|
|
|
488
|
1
|
if(!is.null(ROPE)) {
|
489
|
1
|
fd_plot <- ggplot2::ggplot(data = fd_dat, aes(x = .data$value, y = .data$key)) +
|
490
|
1
|
ggplot2::geom_rect(xmin = ROPE[1], xmax = ROPE[2], ymin = 0, ymax = Inf, fill = "black") +
|
491
|
1
|
ggridges::stat_density_ridges(quantile_lines = TRUE,
|
492
|
1
|
quantiles = c(0.025, 0.5, 0.975),
|
493
|
1
|
vline_color = "white") +
|
494
|
1
|
ggplot2::scale_x_continuous(labels = function(x) x * 100) +
|
495
|
1
|
ggplot2::xlab("Percentage point change in Pr(y = 1)") +
|
496
|
1
|
ggplot2::ylab("")
|
497
|
|
|
498
|
|
# calculate area left/right of ROPE
|
499
|
1
|
fd_outROPE <- apply(fdfull, 2,
|
500
|
1
|
function(x) ifelse(median(x) < 0,
|
501
|
1
|
sum(x < ROPE[1]) / length(x),
|
502
|
1
|
sum(x > ROPE[2]) / length(x)))
|
503
|
1
|
fd_annotate <- data.frame(xpos = apply(fdfull, 2,
|
504
|
1
|
function(x) ifelse(median(x) < 0,
|
505
|
1
|
quantile(x, probs = 0.01) - 0.02,
|
506
|
1
|
quantile(x, probs = 0.99) + 0.02)),
|
507
|
1
|
ypos = as.factor(colnames(fdfull)),
|
508
|
1
|
outROPE = paste(round(fd_outROPE * 100, digits = 1), "%", sep = ""))
|
509
|
|
|
510
|
|
# final plot
|
511
|
1
|
fd_plot <- fd_plot +
|
512
|
1
|
geom_text(data = fd_annotate, aes(x = .data$xpos, y = .data$ypos, label = .data$outROPE),
|
513
|
1
|
color = "black", nudge_y = 0.1, size = 4)
|
514
|
|
} else {
|
515
|
1
|
fd_plot <- ggplot2::ggplot(data = fd_dat,
|
516
|
1
|
aes(x = .data$value, y = .data$key)) +
|
517
|
1
|
ggplot2::geom_vline(xintercept = 0) +
|
518
|
1
|
ggridges::stat_density_ridges(quantile_lines = TRUE,
|
519
|
1
|
quantiles = c(0.025, 0.5, 0.975),
|
520
|
1
|
vline_color = "white") +
|
521
|
1
|
ggplot2::scale_x_continuous(labels = function(x) x*100) +
|
522
|
1
|
ggplot2::xlab("Percentage point change in Pr(y = 1)") +
|
523
|
1
|
ggplot2::ylab("")
|
524
|
|
|
525
|
|
# calculate area left/right of 0
|
526
|
1
|
fd_out0 <- apply(fdfull, 2,
|
527
|
1
|
function(x) ifelse(median(x) < 0,
|
528
|
1
|
sum(x < 0) / length(x),
|
529
|
1
|
sum(x > 0) / length(x)))
|
530
|
1
|
fd_annotate <- data.frame(xpos = apply(fdfull, 2,
|
531
|
1
|
function(x) ifelse(median(x) < 0,
|
532
|
1
|
quantile(x, probs = 0.01) - 0.02,
|
533
|
1
|
quantile(x, probs = 0.99) + 0.02)),
|
534
|
1
|
ypos = as.factor(colnames(fdfull)),
|
535
|
1
|
out0 = paste(round(fd_out0 * 100, digits = 1), "%", sep = ""))
|
536
|
|
|
537
|
|
# final plot
|
538
|
1
|
fd_plot <- fd_plot +
|
539
|
1
|
ggplot2::geom_text(data = fd_annotate,
|
540
|
1
|
aes(x = .data$xpos, y = .data$ypos, label = .data$out0),
|
541
|
1
|
color = "black", nudge_y = 0.1, size = 4)
|
542
|
|
}
|
543
|
|
|
544
|
1
|
fd_plot
|
545
|
|
|
546
|
|
}
|