ewenharrison / finalfit
1
#' Compare missing data
2
#'
3
#' @param .data Dataframe.
4
#' @param dependent Variable to test missingness against other variables with.
5
#' @param explanatory Variables to have missingness tested against.
6
#' @param na_include Include missing data in explanatory variables as a factor
7
#'   level.
8
#' @param ... Other arguments to \code{\link{summary_factorlist}()}.
9
#'
10
#' @return A dataframe comparing missing data in the dependent variable across
11
#'   explanatory variables. Continuous data are compared with a Kruskal Wallis
12
#'   test. Discrete data are compared with a chi-squared test.
13
#' @export
14
#'
15
#' @examples
16
#' library(finalfit)
17
#'
18
#' explanatory = c("age", "age.factor", "extent.factor", "perfor.factor")
19
#' dependent = "mort_5yr"
20
#'
21
#' colon_s %>%
22
#'   ff_glimpse(dependent, explanatory)
23
#'
24
#' colon_s %>%
25
#'  missing_pattern(dependent, explanatory)
26
#'
27
#' colon_s %>%
28
#'   missing_compare(dependent, explanatory)
29

30
missing_compare <- function(.data, dependent, explanatory, na_include = FALSE, ...){
31 1
  if(length(dependent) != 1){
32 0
    stop("One and only one dependent variable must be provided")
33
  }
34
  
35 1
  vlabels = .data %>% 
36 1
    extract_variable_label()
37
  
38 1
  df.out = .data %>% 
39 1
    dplyr::mutate(
40 1
      !! rlang::sym(dependent) := dplyr::case_when(
41 1
        !is.na(!! rlang::sym(dependent)) ~ "Not missing",
42 1
        is.na(!! rlang::sym(dependent)) ~ "Missing"
43
      ) %>% 
44 1
        factor(levels = c("Not missing", "Missing"))
45
    ) %>% 
46 1
    ff_relabel(vlabels)
47
  
48
  # Old code. Remove after checks. 
49
  # # Extract variables
50
  # d_vars = .data[ ,names(.data) %in% dependent, drop = FALSE]
51
  # e_vars = .data[ ,names(.data) %in% explanatory]
52
  # 
53
  # # Extract dependent variable as lost in next move
54
  # d_label = attr(d_vars[,1], "label")
55
  # 
56
  # # Create new variable for missings
57
  # d_vars[,1] = as.character(d_vars[, 1])
58
  # d_vars[!is.na(d_vars)] = 0
59
  # d_vars[is.na(d_vars)] = 1
60
  # d_vars[,1] = factor(d_vars[,1], levels = c(0, 1), labels = c("Not missing", "Missing"))
61
  # 
62
  # # df.out
63
  # df.out = data.frame(d_vars, e_vars)
64
  # attr(df.out[,1], "label") = d_label
65

66 1
  args = list(.data=df.out, dependent=dependent, explanatory=explanatory, 
67 1
              na_include = na_include, ...)
68 1
  if(is.null(args$p)) args$p = TRUE
69 1
  if(is.null(args$add_dependent_label)) args$add_dependent_label = TRUE
70 1
  if(is.null(args$dependent_label_prefix)) args$dependent_label_prefix = "Missing data analysis: "
71
  
72 1
  do.call(summary_factorlist, args)
73
}

Read our documentation on viewing source code .

Loading