ewenharrison / finalfit
1
#' Characterise missing data for \code{finalfit} models
2
#'
3
#' Using \code{finalfit} conventions, produces a missing data matrix using
4
#' \code{\link[mice]{md.pattern}}.
5
#'
6
#' @param .data Data frame. Missing values must be coded \code{NA}.
7
#' @param dependent Character vector usually of length 1, name of depdendent
8
#'   variable.
9
#' @param explanatory Character vector of any length: name(s) of explanatory
10
#'   variables.
11
#' @param rotate.names Logical. Should the orientation of variable names on plot
12
#'   should be vertical.
13
#' @param ... pass other arguments such as \code{plot = TRUE} to
14
#'   \code{\link[mice]{md.pattern}}.
15
#'
16
#' @return A matrix with \code{ncol(x)+1} columns, in which each row corresponds
17
#'   to a missing data pattern (1=observed, 0=missing). Rows and columns are
18
#'   sorted in increasing amounts of missing information. The last column and
19
#'   row contain row and column counts, respectively.
20
#'
21
#' @export
22
#' @examples
23
#' library(finalfit)
24
#' library(dplyr)
25
#' explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor")
26
#' dependent = "mort_5yr"
27
#'
28
#' colon_s %>%
29
#' 	missing_pattern(dependent, explanatory)
30
#' 
31
missing_pattern = function(.data, dependent=NULL, explanatory=NULL, 
32
													 rotate.names = TRUE, ...){
33 1
  if(is.null(dependent) && is.null(explanatory)){
34 1
    df.in = .data
35
  }else{
36 1
    keep = names(.data) %in% c(dependent, explanatory)
37 1
    df.in = .data[keep]
38
  }
39 1
  mice::md.pattern(df.in, rotate.names = rotate.names, ...)
40
}
41

42

43
#' Create predictorMatrix for use with mice
44
#'
45
#' @param .data Data frame.
46
#' @param drop_from_imputed Quoted names of variables not to impute.
47
#' @param drop_from_imputer Quoted names of variables not to use in imputation
48
#'   algorithm.
49
#'
50
#' @return Matrix formatted for predictorMatrix argument in mice.
51
#' @importFrom mice make.predictorMatrix
52
#' @export
53
#'
54
#' @examples
55
#' library(mice)
56
#' library(dplyr)
57
#'
58
#' # Create some extra missing data
59
#' ## Smoking missing completely at random
60
#' set.seed(1)
61
#' colon_s$smoking_mcar =
62
#'   sample(c("Smoker", "Non-smoker", NA),
63
#'   dim(colon_s)[1], replace=TRUE,
64
#'   prob = c(0.2, 0.7, 0.1)) %>%
65
#'   factor() %>%
66
#'   ff_label("Smoking (MCAR)")
67
#'
68
#' ## Make smoking missing conditional on patient sex
69
#' colon_s$smoking_mar[colon_s$sex.factor == "Female"] =
70
#'   sample(c("Smoker", "Non-smoker", NA),
71
#' 	 sum(colon_s$sex.factor == "Female"),
72
#' 	 replace = TRUE, prob = c(0.1, 0.5, 0.4))
73
#'
74
#' colon_s$smoking_mar[colon_s$sex.factor == "Male"] =
75
#'   sample(c("Smoker", "Non-smoker", NA),
76
#' 	 sum(colon_s$sex.factor == "Male"),
77
#' 	 replace=TRUE, prob = c(0.15, 0.75, 0.1))
78
#' colon_s$smoking_mar = factor(colon_s$smoking_mar)%>%
79
#'   ff_label("Smoking (MAR)")
80
#'
81
#' explanatory = c("age", "sex.factor",
82
#'   "nodes", "obstruct.factor", "smoking_mar")
83
#' dependent = "mort_5yr"
84
#'
85
#' colon_s %>%
86
#' select(dependent, explanatory) %>%
87
#'   missing_predictorMatrix(drop_from_imputed =
88
#'     c("obstruct.factor", "mort_5yr")) -> predM
89
#'
90
#' colon_s %>%
91
#' 	select(dependent, explanatory) %>%
92
#' 	mice(m = 2, predictorMatrix = predM) %>% # e.g. m=10 when for real
93
#' 	# Run logistic regression on each imputed set
94
#' 	with(glm(formula(ff_formula(dependent, explanatory)),
95
#' 					 family="binomial")) %>%
96
#' 	pool() %>%
97
#' 	summary(conf.int = TRUE, exponentiate = TRUE) %>%
98
#' 	# Jiggle into finalfit format
99
#' 	mutate(explanatory_name = rownames(.)) %>%
100
#' 	select(explanatory_name, estimate, `2.5 %`, `97.5 %`, p.value) %>%
101
#' 	condense_fit(estimate_suffix = " (multiple imputation)") %>%
102
#' 	remove_intercept() -> fit_imputed
103
#'
104
missing_predictorMatrix = function(.data,
105
																	 drop_from_imputed = NULL,
106
																	 drop_from_imputer = NULL){
107 1
	.data %>%
108 1
		mice::make.predictorMatrix()  -> df.out
109

110 1
	if (!is.null(drop_from_imputed)){
111 1
		df.out[rownames(df.out) %in% drop_from_imputed, ] = 0
112
	}
113 1
	if (!is.null(drop_from_imputer)){
114 0
		df.out[ ,colnames(df.out) %in% drop_from_imputer] = 0
115
	}
116 1
	return(df.out)
117
}
118

Read our documentation on viewing source code .

Loading