rsquaredacademy / descriptr
1
#' @title Descriptive statistics
2
#'
3
#' @description Range of descriptive statistics for continuous data.
4
#'
5
#' @param data A \code{data.frame} or \code{tibble}.
6
#' @param ... Column(s) in \code{data}.
7
#'
8
#' @examples
9
#' # single variable
10
#' ds_summary_stats(mtcarz, mpg)
11
#'
12
#' # multiple variables
13
#' ds_summary_stats(mtcarz, mpg, disp, hp)
14
#'
15
#' # all variables
16
#' ds_summary_stats(mtcarz)
17
#'
18
#' @importFrom rlang !!
19
#' @importFrom stats na.omit
20
#'
21
#' @seealso \code{\link[base]{summary}}
22
#' \code{\link{ds_freq_table}} \code{\link{ds_cross_table}}
23
#'
24
#' @export
25
#'
26
ds_summary_stats <- function(data, ...) {
27

28 1
  check_df(data)
29

30 1
  var <- rlang::quos(...)
31

32 1
  if (length(var) < 1) {
33 1
    is_num <- sapply(data, is.numeric)
34 1
    if (!any(is_num == TRUE)) {
35 1
      stop("Data has no continuous variables.", call. = FALSE)
36
    }
37 0
    data <- data[, is_num]
38
  } else {
39 1
    data %<>%
40 1
      dplyr::select(!!! var)
41 1
    is_num <- sapply(data, is.numeric)
42 1
    if (!any(is_num == TRUE)) {
43 1
      stop("Data has no continuous variables.", call. = FALSE)
44
    }
45
  }
46

47 1
  col_names <- names(data)
48 1
  for (i in col_names) {
49 1
    ds_rule(paste0('Variable: ', i))
50 1
    cat('\n\n')
51 1
    print(ds_summary(data, i))
52 1
    cat('\n\n\n')
53
  }
54

55
}
56

57 1
ds_summary <- function(data, variable) UseMethod("ds_summary")
58

59
ds_summary.default <- function(data, variable) {
60

61 1
  check_df(data)
62 1
  vary  <- rlang::enquo(variable)
63 1
  var_name <- deparse(substitute(variable))
64 1
  check_numeric(data, !! vary, var_name)
65

66 1
  odata <- dplyr::pull(data, !! vary)
67

68 1
  sdata <-
69 1
    data %>%
70 1
    dplyr::pull(!! vary) %>%
71 1
    na.omit()
72

73 1
  low      <- ds_tailobs(sdata, 5, "low")
74 1
  high     <- ds_tailobs(sdata, 5, "high")
75 1
  low_val  <- ds_rindex(sdata, low)
76 1
  high_val <- ds_rindex(sdata, high)
77

78 1
  result <-
79 1
      list(obs      = length(odata),
80 1
           missing  = sum(is.na(odata)),
81 1
           avg      = mean(sdata),
82 1
           tavg     = mean(sdata, trim = 0.05),
83 1
           stdev    = sd(sdata),
84 1
           variance = var(sdata),
85 1
           skew     = ds_skewness(sdata),
86 1
           kurtosis = ds_kurtosis(sdata),
87 1
           uss      = stat_uss(sdata),
88 1
           css      = ds_css(sdata),
89 1
           cvar     = ds_cvar(sdata),
90 1
           sem      = ds_std_error(sdata),
91 1
           median   = median(sdata),
92 1
           mode     = ds_mode(sdata),
93 1
           range    = ds_range(sdata),
94 1
           min      = min(sdata),
95 1
           Max      = max(sdata),
96 1
           iqrange  = IQR(sdata),
97 1
           per99    = quantile(sdata, 0.99),
98 1
           per90    = quantile(sdata, 0.90),
99 1
           per95    = quantile(sdata, 0.95),
100 1
           per75    = quantile(sdata, 0.75),
101 1
           per25    = quantile(sdata, 0.25),
102 1
           per10    = quantile(sdata, 0.10),
103 1
           per5     = quantile(sdata, 0.05),
104 1
           per1     = quantile(sdata, 0.01),
105 1
           lowobs   = low,
106 1
           highobs  = high,
107 1
           lowobsi  = low_val,
108 1
           highobsi = high_val)
109

110 1
  class(result) <- "ds_summary"
111 1
  return(result)
112
}
113

114
print.ds_summary <- function(x, ...) {
115 1
  print_stats(x)
116
}
117

Read our documentation on viewing source code .

Loading