ewenharrison / finalfit
1
#' Add column totals to \code{summary_factorlist()} output
2
#'
3
#' @param df.in \code{summary_factorlist()} output.
4
#' @param .data Data frame used to create \code{summary_factorlist()}.
5
#' @param dependent Character. Name of dependent variable.
6
#' @param na_include_dependent Logical. When TRUE, missing data in the dependent
7
#'   variable is included in totals.
8
#' @param percent Logical. Include percentage.
9
#' @param digits Integer length 1. Number of digits for percentage.
10
#' @param label Character. Label for total row.
11
#' @param prefix Character. Prefix for column totals, e.g "N=".
12
#'
13
#' @return Data frame.
14
#' @export
15
#'
16
#' @examples
17
#' explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor")
18
#' dependent = 'mort_5yr'
19
#' colon_s %>%
20
#'  summary_factorlist(dependent, explanatory) %>%
21
#'  ff_column_totals(colon_s, dependent)
22
#'
23
#' # Ensure works with missing data in dependent
24
#' colon_s = colon_s %>%
25
#'  dplyr::mutate(
26
#'   mort_5yr = forcats::fct_explicit_na(mort_5yr)
27
#'  )
28
#'  colon_s %>%
29
#'  summary_factorlist(dependent, explanatory) %>%
30
#'  ff_column_totals(colon_s, dependent)
31
ff_column_totals <- function(df.in, .data, dependent, na_include_dependent = FALSE, 
32
														 percent = TRUE, digits = 1, label = NULL, prefix = ""){
33 1
	if(!any(names(df.in) == "label")) stop("finalfit function must include: add_dependent_label = FALSE")
34

35 1
	if(na_include_dependent){
36 0
		.data = .data %>% 
37 0
			dplyr::mutate_if(names(.) %in% unlist(dependent) & 
38 0
									sapply(., is.factor),
39 0
								forcats::fct_explicit_na
40
			)
41
	} else {
42 1
		.data = .data %>% 
43 1
			tidyr::drop_na(dependent)
44
	}
45
	
46
	# Create column totals
47 1
	totals = .data %>% 
48 1
		dplyr::group_by(!! dplyr::sym(dependent)) %>% 
49 1
		dplyr::count() %>% 
50 1
		dplyr::group_by() %>% 
51 1
		dplyr::mutate(
52 1
			grand_total = sum(n, na.rm = TRUE),
53 1
			percent = 100 * n / grand_total
54
		)
55 1
	grand_total = totals %>% dplyr::pull(grand_total) %>% unique()
56
	
57 1
	if(percent){
58 1
		totals = totals %>% 
59 1
			dplyr::mutate(
60 1
				n = paste0(prefix, format_n_percent(n, percent, digits))
61
			)
62
	} else {
63 0
		totals = totals %>% 
64 0
			dplyr::mutate(
65 0
				n = paste0(prefix, n)
66
			)
67
	}
68
	
69 1
	if(is.null(label) & percent) label = "Total N (%)"
70 1
	if(is.null(label) & !percent) label = "Total N"
71
	
72
	# Pivot and add
73 1
	totals = totals %>% 
74 1
		dplyr::select(-c(grand_total, percent)) %>% 
75 1
		tidyr::pivot_wider(names_from = dependent, values_from = n) %>% 
76 1
		as.data.frame() %>% 
77 1
		dplyr::mutate(label = label, 
78 1
									levels= "") %>% 
79 1
		dplyr::select(label, levels, dplyr::everything()) 
80
	
81 1
	df.out = dplyr::bind_rows(totals, df.in)
82 1
	df.out[1, is.na(df.out[1, ])] = "" # For neatness change NA to "" in top row
83
	
84
	# Make total
85 1
	if(any(names(df.out) == "Total")){
86 0
		df.out[1, "Total"] = paste0(prefix, grand_total)
87
	}
88 1
	if(any(names(df.out) == "All")){
89 0
		df.out[1, "All"] = paste0(prefix, grand_total)
90
	}
91 1
	return(df.out)
92
}
93

94

95
#' @rdname ff_column_totals
96
#' @export
97
finalfit_column_totals = ff_column_totals
98

99

100
#' Add row totals to \code{summary_factorlist()} output
101
#'
102
#' This adds a total and missing count to variables. This is useful for
103
#' continuous variables. Compare this to \code{summary_factorlist(total_col =
104
#' TRUE)} which includes a count for each dummy variable as a factor and mean
105
#' (sd) or median (iqr) for continuous variables.
106
#'
107
#' @param df.in \code{summary_factorlist()} output.
108
#' @param .data Data frame used to create \code{summary_factorlist()}.
109
#' @param dependent Character. Name of dependent variable.
110
#' @param explanatory Character vector of any length: name(s) of explanatory
111
#'   variables.
112
#' @param missing_column Logical. Include a column of counts of missing data.
113
#' @param na_complete_cases Logical. When TRUE, missing data counts for variables
114
#'   are for compelte cases across all included variables.
115
#' @param na_include_dependent Logical. When TRUE, missing data in the dependent
116
#'   variable is included in totals.
117
#' @param total_name Character. Name of total column.
118
#' @param na_name Character. Name of missing column.
119
#'
120
#' @return Data frame.
121
#' @export
122
#'
123
#' @examples
124
#' explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor")
125
#' dependent = 'mort_5yr'
126
#' colon_s %>%
127
#'  summary_factorlist(dependent, explanatory) %>%
128
#' 	ff_row_totals(colon_s, dependent, explanatory)
129
ff_row_totals <- function(df.in, .data, dependent, explanatory, missing_column = TRUE, 
130
													na_include_dependent = FALSE, na_complete_cases = FALSE,
131
													total_name = "Total N", na_name= "Missing N"){
132 0
	if(!any(names(df.in) == "label")) 
133 0
		stop("summary_factorlist function must include: add_dependent_label = FALSE")
134
	
135
	# Extract labels
136 0
	var_labels = .data %>%
137 0
		extract_variable_label()
138

139 0
	if(na_include_dependent){
140 0
		.data = .data %>%
141 0
			dplyr::mutate_if(names(.) %in% unlist(dependent) &
142 0
											 	sapply(., is.factor),
143 0
											 forcats::fct_explicit_na
144
			)
145
	} else {
146 0
		.data = .data %>%
147 0
			tidyr::drop_na(dependent)
148
	}
149
	
150 0
	which_anyNA <- function(.data){
151 0
		.data %>% 
152 0
			tibble::rowid_to_column() %>% 
153 0
			dplyr::filter_all(dplyr::any_vars(is.na(.))) %>%
154 0
			dplyr::pull(rowid)
155
	}
156
	
157 0
	if(na_complete_cases){
158 0
		.data[which_anyNA(.data), ] = NA
159
	}
160

161
	# Relabel
162 0
	.data = .data %>%
163 0
		ff_relabel(var_labels)
164
	
165 0
	df.out = df.in %>%
166 0
		dplyr::left_join(
167 0
			missing_glimpse(.data, explanatory) %>% 
168 0
				dplyr::mutate(label = as.character(label)), by = "label"
169
		) %>%
170 0
		dplyr::mutate(            # Rename, change to character, remove "NAs"
171 0
			!! total_name := as.character(n) %>% dplyr::coalesce("")
172
		)
173 0
	if(missing_column){
174 0
		df.out = df.out %>% 
175 0
			dplyr::mutate(
176 0
				!! na_name := as.character(missing_n) %>% dplyr::coalesce("")
177 0
			) %>%  # Reorder columns, remove unwanted columns
178 0
			dplyr::select(label, !! total_name, !! na_name, dplyr::everything(), 
179 0
										-c(n, missing_n, var_type, missing_percent))
180
	} else {
181 0
		df.out = df.out %>%
182 0
			dplyr::select(label, !! total_name, dplyr::everything(), 
183 0
										-c(n, missing_n, var_type, missing_percent))
184
	}
185 0
	return(df.out)
186
}
187

188
#' @rdname ff_row_totals
189
#' @export
190
finalfit_row_totals = ff_row_totals

Read our documentation on viewing source code .

Loading