1 ```#' @title Descriptive statistics ``` 2 ```#' ``` 3 ```#' @description Range of descriptive statistics for continuous data. ``` 4 ```#' ``` 5 ```#' @param data A \code{data.frame} or \code{tibble}. ``` 6 ```#' @param ... Column(s) in \code{data}. ``` 7 ```#' ``` 8 ```#' @examples ``` 9 ```#' # single variable ``` 10 ```#' ds_summary_stats(mtcarz, mpg) ``` 11 ```#' ``` 12 ```#' # multiple variables ``` 13 ```#' ds_summary_stats(mtcarz, mpg, disp, hp) ``` 14 ```#' ``` 15 ```#' # all variables ``` 16 ```#' ds_summary_stats(mtcarz) ``` 17 ```#' ``` 18 ```#' @importFrom rlang !! ``` 19 ```#' @importFrom stats na.omit ``` 20 ```#' ``` 21 ```#' @seealso \code{\link[base]{summary}} ``` 22 ```#' \code{\link{ds_freq_table}} \code{\link{ds_cross_table}} ``` 23 ```#' ``` 24 ```#' @export ``` 25 ```#' ``` 26 ```ds_summary_stats <- function(data, ...) { ``` 27 28 1 ``` check_df(data) ``` 29 30 1 ``` var <- rlang::quos(...) ``` 31 32 1 ``` if (length(var) < 1) { ``` 33 1 ``` is_num <- sapply(data, is.numeric) ``` 34 1 ``` if (!any(is_num == TRUE)) { ``` 35 1 ``` stop("Data has no continuous variables.", call. = FALSE) ``` 36 ``` } ``` 37 0 ``` data <- data[, is_num] ``` 38 ``` } else { ``` 39 1 ``` data %<>% ``` 40 1 ``` dplyr::select(!!! var) ``` 41 1 ``` is_num <- sapply(data, is.numeric) ``` 42 1 ``` if (!any(is_num == TRUE)) { ``` 43 1 ``` stop("Data has no continuous variables.", call. = FALSE) ``` 44 ``` } ``` 45 ``` } ``` 46 47 1 ``` col_names <- names(data) ``` 48 1 ``` for (i in col_names) { ``` 49 1 ``` ds_rule(paste0('Variable: ', i)) ``` 50 1 ``` cat('\n\n') ``` 51 1 ``` print(ds_summary(data, i)) ``` 52 1 ``` cat('\n\n\n') ``` 53 ``` } ``` 54 55 ```} ``` 56 57 1 ```ds_summary <- function(data, variable) UseMethod("ds_summary") ``` 58 59 ```ds_summary.default <- function(data, variable) { ``` 60 61 1 ``` check_df(data) ``` 62 1 ``` vary <- rlang::enquo(variable) ``` 63 1 ``` var_name <- deparse(substitute(variable)) ``` 64 1 ``` check_numeric(data, !! vary, var_name) ``` 65 66 1 ``` odata <- dplyr::pull(data, !! vary) ``` 67 68 1 ``` sdata <- ``` 69 1 ``` data %>% ``` 70 1 ``` dplyr::pull(!! vary) %>% ``` 71 1 ``` na.omit() ``` 72 73 1 ``` low <- ds_tailobs(sdata, 5, "low") ``` 74 1 ``` high <- ds_tailobs(sdata, 5, "high") ``` 75 1 ``` low_val <- ds_rindex(sdata, low) ``` 76 1 ``` high_val <- ds_rindex(sdata, high) ``` 77 78 1 ``` result <- ``` 79 1 ``` list(obs = length(odata), ``` 80 1 ``` missing = sum(is.na(odata)), ``` 81 1 ``` avg = mean(sdata), ``` 82 1 ``` tavg = mean(sdata, trim = 0.05), ``` 83 1 ``` stdev = sd(sdata), ``` 84 1 ``` variance = var(sdata), ``` 85 1 ``` skew = ds_skewness(sdata), ``` 86 1 ``` kurtosis = ds_kurtosis(sdata), ``` 87 1 ``` uss = stat_uss(sdata), ``` 88 1 ``` css = ds_css(sdata), ``` 89 1 ``` cvar = ds_cvar(sdata), ``` 90 1 ``` sem = ds_std_error(sdata), ``` 91 1 ``` median = median(sdata), ``` 92 1 ``` mode = ds_mode(sdata), ``` 93 1 ``` range = ds_range(sdata), ``` 94 1 ``` min = min(sdata), ``` 95 1 ``` Max = max(sdata), ``` 96 1 ``` iqrange = IQR(sdata), ``` 97 1 ``` per99 = quantile(sdata, 0.99), ``` 98 1 ``` per90 = quantile(sdata, 0.90), ``` 99 1 ``` per95 = quantile(sdata, 0.95), ``` 100 1 ``` per75 = quantile(sdata, 0.75), ``` 101 1 ``` per25 = quantile(sdata, 0.25), ``` 102 1 ``` per10 = quantile(sdata, 0.10), ``` 103 1 ``` per5 = quantile(sdata, 0.05), ``` 104 1 ``` per1 = quantile(sdata, 0.01), ``` 105 1 ``` lowobs = low, ``` 106 1 ``` highobs = high, ``` 107 1 ``` lowobsi = low_val, ``` 108 1 ``` highobsi = high_val) ``` 109 110 1 ``` class(result) <- "ds_summary" ``` 111 1 ``` return(result) ``` 112 ```} ``` 113 114 ```print.ds_summary <- function(x, ...) { ``` 115 1 ``` print_stats(x) ``` 116 ```} ``` 117