1 ```#' Descriptive statistics for dataframe ``` 2 ```#' ``` 3 ```#' Everyone has a funcion like this, str, glimpse, glance etc. This one is ``` 4 ```#' specifically designed for use with \code{finalfit} language. It is different ``` 5 ```#' in dividing variables by numeric vs factor. ``` 6 ```#' ``` 7 ```#' @param .data Dataframe. ``` 8 ```#' @param dependent Optional character vector: name(s) of depdendent ``` 9 ```#' variable(s). ``` 10 ```#' @param explanatory Optional character vector: name(s) of explanatory ``` 11 ```#' variable(s). ``` 12 ```#' @param digits Significant digits for continuous variable summaries ``` 13 ```#' @param levels_cut Max number of factor levels to include in factor levels ``` 14 ```#' summary (in order to avoid the long printing of variables with many ``` 15 ```#' factors). ``` 16 ```#' ``` 17 ```#' @return Dataframe on summary data. ``` 18 ```#' @export ``` 19 ```#' @importFrom stats median ``` 20 ```#' ``` 21 ```#' @examples ``` 22 ```#' library(finalfit) ``` 23 ```#' dependent = 'mort_5yr' ``` 24 ```#' explanatory = c("age", "nodes", "age.factor", "extent.factor", "perfor.factor") ``` 25 ```#' colon_s %>% ``` 26 ```#' finalfit_glimpse(dependent, explanatory) ``` 27 28 ```ff_glimpse <- function(.data, dependent=NULL, explanatory=NULL, digits = 1, ``` 29 ``` levels_cut = 5){ ``` 30 1 ``` if(all(is.null(dependent), is.null(explanatory))){ ``` 31 0 ``` df.in = .data ``` 32 ``` } else { ``` 33 1 ``` df.in = .data %>% dplyr::select(dependent, explanatory) ``` 34 ``` } ``` 35 36 ``` # Continuous ``` 37 1 ``` df.in %>% ``` 38 1 ``` dplyr::select_if(is.numeric) -> df.numeric ``` 39 40 1 ``` if(dim(df.numeric)[2]!=0){ ``` 41 1 ``` df.numeric %>% ``` 42 1 ``` missing_glimpse(digits=digits) -> df.numeric.out1 ``` 43 44 1 ``` df.numeric %>% ``` 45 1 ``` purrr::map_df(function(x){ ``` 46 1 ``` mean = mean(x, na.rm = TRUE) ``` 47 1 ``` sd = sd(x, na.rm = TRUE) ``` 48 1 ``` min = min(x, na.rm = TRUE) ``` 49 1 ``` quartile_25 = quantile(x, probs = 0.25, na.rm = TRUE) ``` 50 1 ``` median = median(x, na.rm = TRUE) ``` 51 1 ``` quartile_75 = quantile(x, probs = 0.75, na.rm = TRUE) ``` 52 1 ``` max = max(x, na.rm = TRUE) ``` 53 1 ``` df.out = data.frame(mean, sd, min, quartile_25, median, quartile_75, max) %>% ``` 54 1 ``` dplyr::mutate_all(round_tidy, digits=digits) ``` 55 1 ``` }) -> df.numeric.out2 ``` 56 57 1 ``` df.numeric.out = data.frame(df.numeric.out1, df.numeric.out2) ``` 58 59 ``` }else{ ``` 60 0 ``` df.numeric.out = df.numeric ``` 61 ``` } ``` 62 63 ``` # Factors ``` 64 1 ``` df.in %>% ``` 65 1 ``` dplyr::select_if(Negate(is.numeric)) -> df.factors ``` 66 67 1 ``` if(dim(df.factors)[2]!=0){ ``` 68 69 1 ``` df.factors %>% ``` 70 1 ``` missing_glimpse(digits=digits) -> df.factors.out1 ``` 71 72 1 ``` fac2char = function(., cut = levels_cut) { ``` 73 1 ``` length(levels(.)) > cut ``` 74 ``` } ``` 75 76 1 ``` df.factors %>% ``` 77 1 ``` dplyr::mutate_if(fac2char, as.character) -> df.factors ``` 78 79 80 1 ``` df.factors %>% ``` 81 1 ``` purrr::map_df(function(x){ ``` 82 1 ``` levels_n = length(levels(as.factor(x))) ``` 83 1 ``` levels = ifelse(is.factor(x), ``` 84 1 ``` forcats::fct_explicit_na(x) %>% ``` 85 1 ``` levels() %>% ``` 86 1 ``` paste0("\"", ., "\"", collapse = ", "), ``` 87 ``` "-") ``` 88 1 ``` levels_count = ifelse(is.factor(x), ``` 89 1 ``` summary(x) %>% ``` 90 1 ``` paste(collapse = ", "), ``` 91 ``` "-") ``` 92 1 ``` levels_percent = ifelse(is.factor(x), ``` 93 1 ``` summary(x) %>% ``` 94 1 ``` prop.table() %>% ``` 95 1 ``` {. * 100} %>% ``` 96 1 ``` format(digits = 2) %>% ``` 97 1 ``` paste(collapse=", "), ``` 98 ``` "-") ``` 99 1 ``` df.out = tibble::tibble(levels_n, levels, levels_count, levels_percent) %>% data.frame() ``` 100 1 ``` }) -> df.factors.out2 ``` 101 102 1 ``` df.factors.out = data.frame(df.factors.out1, df.factors.out2) ``` 103 104 ``` }else{ ``` 105 0 ``` df.factors.out = df.factors ``` 106 ``` } ``` 107 ``` # Previous "always print" version ``` 108 ``` ## Change to standard ``` 109 ``` # cat("Continuous\n") ``` 110 ``` # print(df.numeric.out, row.names = TRUE) ``` 111 ``` # cat("\nCategorical\n") ``` 112 ``` # print(df.factors.out, row.names = TRUE) ``` 113 ``` # ``` 114 ``` # return(invisible( ``` 115 ``` # list( ``` 116 ``` # continuous = df.numeric.out, ``` 117 ``` # categorical = df.factors.out)) ``` 118 ``` # ) ``` 119 ``` # ``` 120 1 ``` return( ``` 121 1 ``` list( ``` 122 1 ``` Continuous = df.numeric.out, ``` 123 1 ``` Categorical = df.factors.out) ``` 124 ``` ) ``` 125 ```} ``` 126 127 128 ```#' @rdname ff_glimpse ``` 129 ```#' @export ``` 130 ```finalfit_glimpse <- ff_glimpse ```

Read our documentation on viewing source code .