ewenharrison / finalfit
1
#' Summarise a set of factors (or continuous variables) by a dependent variable
2
#'
3
#' A function that takes a single dependent variable with a vector of
4
#' explanatory variable names (continuous or categorical variables) to produce a
5
#' summary table.
6
#'
7
#' This function aims to produce publication-ready summary tables for
8
#' categorical or continuous dependent variables. It usually takes a categorical
9
#' dependent variable to produce a cross table of counts and proportions
10
#' expressed as percentages or summarised continuous explanatory variables.
11
#' However, it will take a continuous dependent variable to produce mean
12
#' (standard deviation) or median (interquartile range) for use with linear
13
#' regression models.
14
#'
15
#' @param .data Dataframe.
16
#' @param dependent Character vector of length 1:  name of dependent variable (2
17
#'   to 5 factor levels).
18
#' @param explanatory Character vector of any length: name(s) of explanatory
19
#'   variables.
20
#' @param cont Summary for continuous explanatory variables: "mean" (standard
21
#'   deviation) or "median" (interquartile range). If "median" then
22
#'   non-parametric hypothesis test performed (see below).
23
#' @param cont_nonpara Numeric vector of form e.g. \code{c(1,2)}. Specify which
24
#'   variables to perform non-parametric hypothesis tests on and summarise with
25
#'   "median".
26
#' @param cont_cut  Numeric: number of unique values in continuous variable at
27
#'   which to consider it a factor.
28
#' @param cont_range Logical. Median is show with 1st and 3rd quartiles.
29
#' @param p Logical: Include null hypothesis statistical test.
30
#' @param p_cont_para Character. Continuous variable parametric test. One of
31
#'   either "aov" (analysis of variance) or "t.test" for Welch two sample
32
#'   t-test. Note continuous non-parametric test is always Kruskal Wallis
33
#'   (kruskal.test) which in two-group setting is equivalent to Mann-Whitney U
34
#'   /Wilcoxon rank sum test.
35
#'
36
#'   For continous dependent and continuous explanatory, the parametric test
37
#'   p-value returned is for the Pearson correlation coefficient. The
38
#'   non-parametric equivalent is for the p-value for the Spearman correlation
39
#'   coefficient.
40
#' @param p_cat Character. Categorical variable test. One of either "chisq" or
41
#'   "fisher".
42
#' @param column Logical: Compute margins by column rather than row.
43
#' @param total_col Logical: include a total column summing across factor
44
#'   levels.
45
#' @param orderbytotal Logical: order final table by total column high to low.
46
#' @param digits Number of digits to round to (1) mean/median, (2) standard
47
#'   deviation / interquartile range, (3) p-value, (4) count percentage.
48
#' @param na_include Logical: make explanatory variables missing data explicit
49
#'   (\code{NA}).
50
#' @param na_include_dependent Logical: make dependent variable missing data
51
#'   explicit.
52
#' @param na_complete_cases Logical: include only rows with complete data.
53
#' @param na_to_p Logical: include missing as group in statistical test.
54
#' @param fit_id Logical: allows merging via \code{\link{finalfit_merge}}.
55
#' @param add_dependent_label Add the name of the dependent label to the top
56
#'   left of table.
57
#' @param dependent_label_prefix Add text before dependent label.
58
#' @param dependent_label_suffix Add text after dependent label.
59
#' @param add_col_totals Logical. Include column total n.
60
#' @param include_col_totals_percent Include column percentage of total.
61
#' @param col_totals_rowname Logical. Row name for column totals.
62
#' @param col_totals_prefix Character. Prefix to column totals, e.g. "N=".
63
#' @param add_row_totals Logical. Include row totals. Note this differs from
64
#'   \code{total_col} above particularly for continuous explanatory variables.
65
#' @param include_row_missing_col Logical. Include missing data total for each
66
#'   row. Only used when \code{add_row_totals} is \code{TRUE}.
67
#' @param row_totals_colname Character. Column name for row totals.
68
#' @param row_missing_colname Character. Column name for missing data totals for
69
#'   each row.
70
#' @param catTest Deprecated. See \code{p_cat} above.
71
#'
72
#' @return Returns a \code{factorlist} dataframe.
73
#'
74
#' @family finalfit wrappers
75
#' @seealso \code{\link{fit2df}} \code{\link{ff_column_totals}}
76
#'   \code{\link{ff_row_totals}} \code{\link{ff_label}} \code{\link{ff_glimpse}}
77
#'   \code{\link{ff_percent_only}}
78
#' @export
79
#'
80
#' @examples
81
#' library(finalfit)
82
#' library(dplyr)
83
#' # Load example dataset, modified version of survival::colon
84
#' data(colon_s)
85
#'
86
#' # Table 1 - Patient demographics ----
87
#' explanatory = c("age", "age.factor", "sex.factor", "obstruct.factor")
88
#' dependent = "perfor.factor"
89
#' colon_s %>%
90
#'   summary_factorlist(dependent, explanatory, p=TRUE)
91
#'
92
#' # summary.factorlist() is also commonly used to summarise any number of
93
#' # variables by an outcome variable (say dead yes/no).
94
#'
95
#' # Table 2 - 5 yr mortality ----
96
#' explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor")
97
#' dependent = "mort_5yr"
98
#' colon_s %>%
99
#'   summary_factorlist(dependent, explanatory)
100
summary_factorlist <- function(.data, 
101
															 dependent = NULL, explanatory, 
102
															 cont = "mean", cont_nonpara = NULL, cont_cut = 5, cont_range = FALSE, 
103
															 p = FALSE, p_cont_para = "aov", p_cat = "chisq",
104
															 column = TRUE, total_col = FALSE, orderbytotal = FALSE,
105
															 digits = c(1, 1, 3, 1), 
106
															 na_include = FALSE, na_include_dependent = FALSE, 
107
															 na_complete_cases = FALSE, na_to_p = FALSE,
108
															 fit_id = FALSE,
109
															 add_dependent_label = FALSE,  
110
															 dependent_label_prefix = "Dependent: ", dependent_label_suffix = "",
111
															 add_col_totals = FALSE, include_col_totals_percent = TRUE,
112
															 col_totals_rowname = NULL, col_totals_prefix = "",
113
															 add_row_totals = FALSE, include_row_missing_col = TRUE,
114
															 row_totals_colname = "Total N", row_missing_colname = "Missing N",
115
															 catTest = NULL){
116
	
117
	
118
	# Warnings/Checks --------------
119 1
	if(!is.data.frame(.data)) stop(".data is not dataframe")
120 1
	if(any(class(.data) %in% c("tbl_df", "tbl"))) .data = data.frame(.data)
121 1
	if(is.null(explanatory)) stop("No explanatory variable(s) provided")
122 1
	if(any(explanatory == ".")){
123 0
		explanatory = .data %>% 
124 0
			dplyr::select(-dependent) %>% 
125 0
			names()
126
	}
127 1
	if(is.null(dependent)){
128 0
		message("No dependent variable(s) provided; defaulting to single-level factor")
129 0
		dependent = "all"
130 0
		.data$all = factor(1, labels="all")
131
	}
132 1
	if(na_to_p & !na_include) warning("If wish to pass missing to hypothesis test (na_to_p), must have na_include = TRUE")
133
	
134
	# Deprecated catTest from Hmisc for reverse dependencies
135 1
	if(!is.null(catTest)){
136 0
		message("catTest is deprecated. Using p_cat = 'fisher'")
137 0
		p_cat = "fisher"}
138
	
139
	# Extract explanatory terms (to support using * and :)
140 1
	explanatory_terms = paste("~", paste(explanatory, collapse = "+")) %>% 
141 1
		formula() %>% 
142 1
		terms() %>% 
143 1
		attr("term.labels")
144
	
145 1
	if(dependent %in% explanatory) stop("Cannot have dependent variable in explanatory list.")
146
	
147 1
	if(!is.null(cont_nonpara) && max(cont_nonpara) > length(explanatory)) {
148 0
		stop("cont_nonpara cannot include values greater than the number of explanatory variables")
149
	}
150
	
151
	# Definitions ------------------------------------------------------------
152
	## Dependent as survival object handling
153 1
	d_is.surv = grepl("Surv[(].*[)]", dependent)
154
	
155 1
	if(d_is.surv){
156 1
		message("Dependent variable is a survival object")
157 1
		.data$all = factor(1, labels="all")
158 1
		dependent = "all"
159
		
160
		# Remove strata and cluster terms - keep in table for now
161 1
		drop = grepl("cluster[(].*[)]", explanatory) |
162 1
			grepl("strata[(].*[)]", explanatory) |
163 1
			grepl("frailty[(].*[)]", explanatory)
164 1
		explanatory = explanatory[!drop]
165
	}    
166
	
167
	# Remove interactions and indicator variables
168
	## Intentionally done separately to above line. 
169 1
	explanatory = paste("~", paste(explanatory, collapse = "+")) %>% 
170 1
		formula() %>% 
171 1
		all.vars()
172
	
173
	## Active dataset
174 1
	.data = .data %>% 
175 1
		dplyr::select(dependent, explanatory)
176
	
177
	## Dependent is numeric
178 1
	d_is.numeric = .data %>% 
179 1
		dplyr::pull(dependent) %>% 
180 1
		is.numeric()
181
	
182 1
	if(d_is.numeric & add_col_totals){
183 0
		add_col_totals = FALSE
184 0
		message("Cannot have add_col_totals with numeric dependent.")
185
	}
186
	
187
	## Continous data to categorical if unique values below threshold
188 1
	cont_distinct = .data %>%
189 1
		dplyr::select(explanatory) %>% 
190 1
		dplyr::summarise_if(is.numeric, dplyr::n_distinct) %>% 
191 1
		purrr::keep(~ .x < cont_cut) %>% 
192 1
		names()
193 1
	.data = .data %>% 
194 1
		dplyr::mutate_at(cont_distinct, as.factor)
195
	
196
	## Explanatory variable type
197 1
	explanatory_type = .data %>% 
198 1
		dplyr::select(explanatory) %>% 
199 1
		purrr::map(is.numeric)
200
	
201
	# Non-parametric variables
202 1
	explanatory_nonpara = vector(length = length(explanatory))
203 1
	explanatory_nonpara[cont_nonpara] = TRUE
204 1
	if(cont == "median") explanatory_nonpara = TRUE
205
	
206
	## Labels
207 1
	var_labels = .data %>% 
208 1
		dplyr::select(explanatory) %>% 
209 1
		extract_variable_label()
210
	
211
	# Missing data handling ------------------------------------------------------------
212 1
	df.in = .data
213
	
214
	# Explanatory variables, make NA explicit for factors
215 1
	if(na_complete_cases){
216 0
		df.in = df.in %>% 
217 0
			tidyr::drop_na()
218
	}
219
	
220 1
	if(na_include){
221 0
		df.in = df.in %>% 
222 0
			dplyr::mutate_if(names(.) %in% unlist(explanatory) & 
223 0
											 	sapply(., is.factor),
224 0
											 forcats::fct_explicit_na
225
			)}
226
	
227 1
	if(na_include_dependent & !d_is.numeric){
228 0
		df.in = df.in %>% 
229 0
			dplyr::mutate(
230 0
				!! sym(dependent) := forcats::fct_explicit_na(!! sym(dependent))
231
			)
232 1
	} else if(!na_include_dependent & !d_is.numeric){
233 1
		df.in = df.in %>% 
234 1
			tidyr::drop_na(dependent)
235 1
	} else if(na_include_dependent & d_is.numeric){
236 0
		warnings("Dependent is numeric and missing values cannot be made explicit. 
237 0
							 Make dependent a factor or use na_include_dependent = FALSE.")
238
	}
239
	
240
	## Missing data to p-tests or not 
241 1
	if(na_to_p){
242 0
		df.p = df.in
243
	} else {
244 1
		df.p = .data
245
	}
246 1
	if(p && na_to_p){
247 0
		message("Explanatory variable(s) missing data included in hypothesis test (p-value).")
248
	}
249
	
250 1
	if(!na_include_dependent &
251 1
		 .data %>% 
252 1
		 dplyr::pull(dependent) %>% 
253 1
		 is.na() %>% 
254 1
		 any()) {message("Note: dependent includes missing data. These are dropped.")}
255
	
256
	
257
	# Continous dependent --------------------------------------------------------------------
258 1
	if(d_is.numeric){
259
		
260
		## Hypothesis tests ---------
261 1
		if(p){
262 1
			p_tests =  purrr::pmap(list(explanatory, explanatory_type, explanatory_nonpara), 
263
														 # Categorical / parametric
264 1
														 ~ if(!..2 && !..3){
265 1
														 	if(p_cont_para == "aov"){
266 1
														 		summary(aov(as.formula(paste(dependent, "~", ..1)), df.p))[[1]][["Pr(>F)"]][[1]] %>% 
267 1
														 			p_tidy(digits[3], "")
268 1
														 	} else if (p_cont_para == "t.test"){
269 0
														 		t.test(as.formula(paste(dependent, "~", ..1)), df.p)$p.value %>% 
270 0
														 			p_tidy(digits[3], "")
271
														 	}
272
														 	# Categorical / non-parametric
273 1
														 } else if (!..2 & ..3){
274 0
														 	kruskal.test(as.formula(paste(dependent, "~", ..1)), df.p)$p.value %>% 
275 0
														 		p_tidy(digits[3], "") 
276
														 	# Continous / parametric
277 1
														 } else if (..2 & !..3){
278 1
														 	cor.test(as.formula(paste("~", dependent, "+", ..1)), df.p, method="pearson")$p.value %>% 
279 1
														 		p_tidy(digits[3], "") 
280
														 	# Continous / non-parametric
281 1
														 } else if (..2 & ..3){
282 0
														 	cor.test(as.formula(paste("~", dependent, "+", ..1)), df.p, method="spearman")$p.value %>% 
283 0
														 		p_tidy(digits[3], "") 
284
														 }
285
			)
286
		}  
287
		
288 1
		summary_cont_name = rep("Mean (sd)", length(explanatory_nonpara))
289 1
		summary_cont_name[explanatory_nonpara] = "Median (IQR)"
290
		
291
		## Output table  --------------  
292 1
		df.out = purrr::pmap(list(explanatory, explanatory_type, explanatory_nonpara, summary_cont_name), 
293 1
												 ~ if(!..2){
294 1
												 	df.in %>% 
295 1
												 		dplyr::group_by(!! sym(..1)) %>%  
296 1
												 		tidyr::drop_na(!! sym(dependent)) %>% 
297 1
												 		dplyr::summarise(value_mean = mean(!! sym(dependent), na.rm = TRUE),
298 1
												 										 value_sd = sd(!! sym(dependent), na.rm = TRUE),
299 1
												 										 value_median = median(!! sym(dependent), na.rm = TRUE),
300 1
												 										 value_q1 =quantile(!! sym(dependent), 0.25, na.rm = TRUE),
301 1
												 										 value_q3 = quantile(!! sym(dependent), 0.75, na.rm = TRUE),
302 1
												 										 n = dplyr::n()) %>% 
303 1
												 		tidyr::drop_na() %>% 
304 1
												 		dplyr::ungroup() %>% 
305 1
												 		dplyr::mutate(
306 1
												 			col_total = sum(n),
307 1
												 			col_total_prop = 100 * n/col_total,
308 1
												 			Total = format_n_percent(n, col_total_prop, digits[[4]]),
309 1
												 			label = ..1,
310 1
												 			unit = ..4,
311
												 		)  %>% 
312 1
												 		dplyr::rename(levels = 1) %>% 
313 1
												 		{ if(! ..3){
314 1
												 			dplyr::mutate(., 
315 1
												 										value = paste0(value_mean %>% round_tidy(digits[1]), " (", 
316 1
												 																	 value_sd %>% round_tidy(digits[1]), ")")
317
												 			)
318
												 		} else {
319 1
												 			{ if(cont_range){
320 1
												 				dplyr::mutate(., 
321 1
												 											value = paste0(value_median %>% round_tidy(digits[1]), " (", 
322 1
												 																		 value_q1 %>% round_tidy(digits[1]), " to ",
323 1
												 																		 value_q3 %>% round_tidy(digits[1]), ")")
324
												 				) 
325
												 			} else {
326 1
												 				dplyr::mutate(., 
327 1
												 											value = paste0(value_median %>% round_tidy(digits[1]), " (", 
328 1
												 																		 {value_q3 - value_q1} %>% round_tidy(digits[1]), ")")
329
												 				)
330
												 			}}
331
												 			
332
												 		}} %>% 
333 1
												 		{if(total_col){
334 1
												 			dplyr::select(., label, levels, unit, value, Total)
335
												 		} else {
336 1
												 			dplyr::select(., label, levels, unit, value)
337
												 		}}  %>% 
338 1
												 		dplyr::mutate_all(as.character)
339 1
												 } else if(..2){
340 1
												 	df.in %>% 
341 1
												 		tidyr::drop_na(!! sym(dependent)) %>% 
342 1
												 		dplyr::summarise(value_mean = mean(!! sym(dependent), na.rm = TRUE),
343 1
												 										 value_sd = sd(!! sym(dependent), na.rm = TRUE),
344 1
												 										 value_median = median(!! sym(dependent), na.rm = TRUE),
345 1
												 										 value_q1 =quantile(!! sym(dependent), 0.25, na.rm = TRUE),
346 1
												 										 value_q3 = quantile(!! sym(dependent), 0.75, na.rm = TRUE),
347 1
												 										 value_min = min(!! sym(..1), na.rm = TRUE),
348 1
												 										 value_max = max(!! sym(..1), na.rm = TRUE),
349 1
												 										 n = (!is.na(!! sym(..1))) %>% sum(),
350
												 										 # row_total = dplyr::n(), # think about whether prop of df length
351
												 										 # row_prop = 100 * n/row_total,
352 1
												 										 Total = format_n_percent(n, 100, digits[[4]])) %>%
353 1
												 		dplyr::mutate(
354 1
												 			label = ..1,
355 1
												 			levels = paste0("[", value_min %>% round_tidy(digits[1]), ",", 
356 1
												 											value_max %>% round_tidy(digits[1]), "]"),
357 1
												 			unit = ..4
358
												 		) %>% 
359 1
												 		{ if(! ..3){
360 1
												 			dplyr::mutate(., 
361 1
												 										value = paste0(value_mean %>% round_tidy(digits[1]), " (", 
362 1
												 																	 value_sd %>% round_tidy(digits[1]), ")")
363
												 			)
364
												 		} else {
365 1
												 			{ if(cont_range){
366 1
												 				dplyr::mutate(., 
367 1
												 											value = paste0(value_median %>% round_tidy(digits[1]), " (", 
368 1
												 																		 value_q1 %>% round_tidy(digits[1]), " to ",
369 1
												 																		 value_q3 %>% round_tidy(digits[1]), ")")
370
												 				) 
371
												 			} else {
372 1
												 				dplyr::mutate(., 
373 1
												 											value = paste0(value_median %>% round_tidy(digits[1]), " (", 
374 1
												 																		 {value_q3 - value_q1} %>% round_tidy(digits[1]), ")")
375
												 				)
376
												 			}}
377
												 			
378
												 		}} %>% 
379 1
												 		{if(total_col){
380 1
												 			dplyr::select(., label, levels, unit, value, Total)
381
												 		} else{
382 1
												 			dplyr::select(., label, levels, unit, value)
383
												 		}}  %>% 
384 1
												 		dplyr::mutate_all(as.character)
385
												 }
386
		)
387
	} else {
388
		
389
		# Categorical dependent -----------------------------------------------------------------------------
390
		
391
		## Hypothesis tests ---------
392 1
		if(p){
393 1
			p_tests =  purrr::pmap(list(explanatory, explanatory_type, explanatory_nonpara), 
394 1
														 ~ if(!..2){
395 1
														 	df.p %>%
396 1
														 		{ if(p_cat == "chisq"){
397 1
														 			dplyr::summarise(., chisq.test(!! sym(..1), !! sym(dependent))$p.value) %>% 
398 1
														 				p_tidy(digits[3], "")
399 1
														 		} else if (p_cat == "fisher"){
400 0
														 			dplyr::summarise(., fisher.test(!! sym(..1), !! sym(dependent))$p.value) %>% 
401 0
														 				p_tidy(digits[3], "")
402
														 		}}
403 1
														 } else if (..2 & !..3){
404 1
														 	{if (p_cont_para == "aov"){
405 1
														 		summary(aov(as.formula(paste(..1, "~", dependent)), df.p))[[1]][["Pr(>F)"]][[1]] %>% 
406 1
														 			p_tidy(digits[3], "")
407 1
														 	} else if (p_cont_para == "t.test"){
408 1
														 		t.test(as.formula(paste(..1, "~", dependent)), df.p)$p.value %>% 
409 1
														 			p_tidy(digits[3], "")
410
														 	}}
411 1
														 } else if (..2 & ..3){
412 0
														 	kruskal.test(as.formula(paste(..1, "~", dependent)), df.p)$p.value %>% 
413 0
														 		p_tidy(digits[3], "") 
414
														 }
415
			)
416
		}
417
		
418
		
419
		## Output table  --------------  
420 1
		df.out = purrr::pmap(list(explanatory, explanatory_type, explanatory_nonpara), 
421 1
												 ~ if(!..2){
422 1
												 	df.in %>% 
423 1
												 		dplyr::group_by(!! sym(dependent)) %>% 
424 1
												 		dplyr::count(!! sym(..1), .drop = FALSE) %>% 
425 1
												 		dplyr::ungroup() %>% 
426 1
												 		tidyr::drop_na() %>% 
427 1
												 		dplyr::mutate(grand_total = sum(n)) %>% 
428 1
												 		dplyr::group_by_at(2) %>% 
429 1
												 		dplyr::mutate(row_total = sum(n),
430 1
												 									col_total_prop = 100 * row_total / grand_total) %>% 
431 1
												 		{ if(column) {
432 1
												 			dplyr::group_by(., !! sym(dependent)) %>% 
433 1
												 				dplyr::mutate(
434 1
												 					col_total = sum(n),
435 1
												 					prop = 100 * n / col_total,
436 1
												 					Total = format_n_percent(row_total, col_total_prop, digits[[4]])
437
												 				) %>% 
438 1
												 				dplyr::select(-col_total)
439
												 		} else { 
440 1
												 			dplyr::group_by_at(., 2) %>% 
441 1
												 				dplyr::mutate(
442 1
												 					prop = 100 * n / row_total,
443 1
												 					Total = paste0(row_total, " (100)")
444
												 				)
445
												 		}
446
												 		} %>% 
447 1
												 		dplyr::ungroup() %>% 
448 1
												 		dplyr::mutate(
449 1
												 			value = format_n_percent(n, prop, digits[4])
450
												 		) %>%
451 1
												 		dplyr::select(-prop, -n, -grand_total, -col_total_prop) %>% 
452 1
												 		tidyr::pivot_wider(names_from = !! dependent, values_from = value) %>% 
453 1
												 		dplyr::mutate(
454 1
												 			label = names(.)[1]
455
												 		) %>%
456 1
												 		dplyr::rename(levels = 1) %>% 
457 1
												 		{if(orderbytotal){
458 0
												 			dplyr::arrange(., -row_total) 
459
												 		} else {
460
												 			.
461
												 		}} %>% 
462 1
												 		dplyr::select(-row_total) %>%     
463 1
												 		dplyr::select(label, levels, dplyr::everything()) %>% 
464 1
												 		dplyr::select(-Total, dplyr::everything()) %>% 
465 1
												 		dplyr::mutate_all(as.character) %>% 
466
												 		# Total column
467 1
												 		{ if(total_col){
468
												 			.
469
												 		} else {
470 1
												 			dplyr::select(., -Total)
471
												 		}
472
												 		}
473
												 } else {
474 1
												 	df.in %>% 
475 1
												 		dplyr::mutate(
476 1
												 			value_mean_total = mean(!! sym(..1), na.rm = TRUE),
477 1
												 			value_sd_total = sd(!! sym(..1), na.rm = TRUE),
478 1
												 			value_median_total = median(!! sym(..1), na.rm = TRUE),
479 1
												 			value_q1_total = quantile(!! sym(..1), 0.25, na.rm = TRUE),
480 1
												 			value_q3_total = quantile(!! sym(..1), 0.75, na.rm = TRUE)
481
												 		) %>%
482 1
												 		dplyr::group_by(!! sym(dependent)) %>% 
483 1
												 		dplyr::summarise(
484 1
												 			value_mean = mean(!! sym(..1), na.rm = TRUE),
485 1
												 			value_sd = sd(!! sym(..1), na.rm = TRUE),
486 1
												 			value_median = median(!! sym(..1), na.rm = TRUE),
487 1
												 			value_q1 = quantile(!! sym(..1), 0.25, na.rm = TRUE),
488 1
												 			value_q3 = quantile(!! sym(..1), 0.75, na.rm = TRUE),
489 1
												 			value_iqr = value_q3 - value_q1,
490 1
												 			value_mean_total = unique(value_mean_total),
491 1
												 			value_sd_total = unique(value_sd_total),
492 1
												 			value_median_total = unique(value_median_total),
493 1
												 			value_q1_total = unique(value_q1_total),
494 1
												 			value_q3_total = unique(value_q3_total),
495 1
												 			value_iqr_total = value_q3_total - value_q1_total,
496
												 			
497
												 		) %>% 
498 1
												 		{ if(! ..3) {
499 1
												 			dplyr::mutate(., 
500 1
												 										value = paste0(value_mean %>% round_tidy(digits[1]), " (", 
501 1
												 																	 value_sd %>%  round_tidy(digits[2]), ")") ,
502 1
												 										Total = paste0(value_mean_total %>% round_tidy(digits[1]), " (", 
503 1
												 																	 value_sd_total %>%  round_tidy(digits[2]), ")") 
504
												 			) %>% 
505 1
												 				dplyr::select(dependent, value, Total) %>% 
506 1
												 				tidyr::pivot_wider(names_from = !! dependent, values_from = value) %>% 
507 1
												 				dplyr::mutate(
508 1
												 					label = .x,
509 1
												 					levels = "Mean (SD)"
510
												 				) 
511 1
												 		} else if (..3){
512 1
												 			{if(cont_range){
513 0
												 				dplyr::mutate(., 
514 0
												 											value = paste0(value_median %>% round_tidy(digits[1]), " (", 
515 0
												 																		 value_q1 %>% round_tidy(digits[2]), " to ",
516 0
												 																		 value_q3 %>% round_tidy(digits[2]), ")"), 
517 0
												 											Total = paste0(value_median_total %>% round_tidy(digits[1]), " (", 
518 0
												 																		 value_q1_total %>% round_tidy(digits[2]), " to ",
519 0
												 																		 value_q3_total %>% round_tidy(digits[2]), ")") 
520
												 				)
521
												 			} else {
522 1
												 				dplyr::mutate(., 
523 1
												 											value = paste0(value_median %>% round_tidy(digits[1]), " (", 
524 1
												 																		 value_iqr %>% round_tidy(digits[2]), ")"), 
525 1
												 											Total = paste0(value_median_total %>% round_tidy(digits[1]), " (", 
526 1
												 																		 value_iqr_total %>% round_tidy(digits[2]), ")") 
527
												 				)
528
												 			}} %>% 
529 1
												 				dplyr::select(dependent, value, Total) %>% 
530 1
												 				tidyr::pivot_wider(names_from = !! dependent, values_from = value) %>% 
531 1
												 				dplyr::mutate(
532 1
												 					label = .x,
533 1
												 					levels = "Median (IQR)"
534
												 				) 
535
												 		}
536
												 		} %>% 
537 1
												 		dplyr::select(label, levels, dplyr::everything()) %>% 
538 1
												 		dplyr::select(-Total, dplyr::everything()) %>% 
539
												 		# Total column
540 1
												 		{ if(total_col){
541
												 			.
542
												 		} else {
543 1
												 			dplyr::select(., -Total)
544
												 		}
545
												 		}
546
												 }
547
		) 
548
	}
549 1
	df.out = df.out %>% 
550
		# Add hypothesis test
551 1
		{ if(p){
552 1
			purrr::map2_df(., p_tests,
553 1
										 ~ dplyr::mutate(.x,
554 1
										 								p = .y)
555
			)} else {
556 1
				dplyr::bind_rows(.)
557
			}} %>%
558 1
		dplyr::select(label, levels, dplyr::everything()) %>% 
559 1
		as.data.frame() %>%
560 1
		{ if(fit_id){
561 1
			levels_id = .$levels
562
			# Catagorical outcome, continous explanatory
563 1
			drop = levels_id %in% c("Mean (SD)", "Median (IQR)")
564 1
			levels_id[drop] = ""
565
			# Continuous outcome, continuous explanatory
566 1
			regex_sqbracket = "^(\\[).*(\\])$"
567 1
			drop = grepl(regex_sqbracket, levels_id)
568 1
			levels_id[drop] = ""
569
			# Where extra terms included, add these in, e.g. I(var) (not interactions)
570 1
			extra_terms = explanatory_terms[-which(explanatory_terms %in% explanatory)]
571 1
			drop = grepl(":", extra_terms)
572 1
			extra_terms = extra_terms[!drop]
573 1
			{ if(!identical(extra_terms, character(0))){
574 0
				levels_id = c(levels_id, rep("", length(extra_terms)))
575 0
				dplyr::add_row(., label = extra_terms)
576
			} else {
577
				.
578
			}} %>% 
579 1
			dplyr::mutate(., fit_id = paste0(label, levels_id),
580 1
										index = 1:dim(.)[1])
581
		} else {
582
			.
583
		}} %>% 
584
		
585
		# Recode variable names to labels where available 
586 1
		dplyr::mutate(
587 1
			label = dplyr::recode(label,  !!! var_labels)
588
		) %>% 
589
		
590
		# Remove duplicate variables/p-values
591 1
		rm_duplicate_labels() %>% 
592
		
593
		# Add column totals
594 1
		{ if(add_col_totals){
595 0
			ff_column_totals(., df.in, dependent, 
596 0
											 percent = include_col_totals_percent, 
597 0
											 na_include_dependent = na_include_dependent,
598 0
											 digits = digits[4], label = col_totals_rowname, 
599 0
											 prefix = col_totals_prefix)
600
		} else {
601
			.
602
		}} %>% 
603
		
604
		# Add row totals
605 1
		{ if(add_row_totals){
606 0
			ff_row_totals(., .data, dependent, explanatory, missing_column = include_row_missing_col,
607 0
										na_include_dependent = FALSE, na_complete_cases = na_complete_cases, 
608 0
										total_name = row_totals_colname, na_name = row_missing_colname)
609
		} else {
610
			.
611
		}} %>% 
612
		
613
		
614
		# Add dependent label
615 1
		{ if(add_dependent_label){
616 1
			dependent_label(., .data, dependent, 
617 1
											prefix=dependent_label_prefix, suffix = dependent_label_suffix)
618
		} else {
619
			.
620
		}} %>% 
621
		
622
		# Replace any missing values with "", e.g. in (Missing) column
623 1
		dplyr::mutate_all(.,
624 1
											~ ifelse(is.na(.), "", .)
625
		)
626 1
	class(df.out) = c("data.frame.ff", class(df.out))
627 1
	return(df.out)
628
}

Read our documentation on viewing source code .

Loading