hafen / trelliscopejs
1
#
2
# get_label <- function(data, cols) {
3
#   sapply(cols, function(col) {
4
#     lbl <- attr(data[[col]], "label")
5
#     if (is.null(lbl))
6
#       lbl <- col
7
#     lbl
8
#   })
9
# }
10

11
stop_nice <- function(...) {
12 1
  stop(paste(strwrap(paste(...), exdent = 7), collapse = "\n"), call. = FALSE)
13
}
14

15

16
# #' Compute automatic cognostics
17
# #'
18
# #' @param data a list of data frames (one per subset), a grouped data frame, or a nested data frame
19
# #' @return If the input is a list of data frames, the return value is a list of data frames containing the cognostics. If the input is a grouped or nested df, the result will be a nested df with a new column containing the cognostics.
20
# #' @importFrom purrr map map_df
21
# #' @export
22
# #' @seealso \code{\link{trelliscope}}
23
# auto_cogs <- function(data) {
24
#
25
#   # if a grouped df, nest it so we have a nested df
26
#   if (inherits(data, "grouped_df")) {
27
#     # nesting causes label attributes to be lost, so preserve them...
28
#     # (need to find a better way to deal with this)
29
#     labels <- lapply(data, function(x) attr(x, "label"))
30
#
31
#     data <- nest(data)
32
#
33
#     # set first subset label attributes (auto_cogs will look for them here)
34
#     for (nm in names(data$data[[1]]))
35
#       attr(data$data[[1]][[nm]], "label") <- labels[[nm]]
36
#   }
37
#
38
#   # in the case of nested df, there should be atomic columns indicating splitting variables
39
#   # and then a single "list" column of data frames
40
#   data_is_df <- FALSE
41
#   if (is.data.frame(data)) {
42
#     data_is_df <- TRUE
43
#
44
#     is_atomic <- sapply(data, is.atomic)
45
#     # at_least_one_atomic <- length(which(is_atomic)) > 0
46
#     exactly_one_non_atomic <- length(which(!is_atomic)) == 1
47
#     if (!exactly_one_non_atomic)
48
#       stop_nice("Data supplied to auto_cogs must be a data frame with a single",
49
#         "nested data frame column.")
50
#
51
#     nest_nm <- names(data)[which(!is_atomic)]
52
#
53
#     if (! inherits(data[[nest_nm]][[1]], "data.frame"))
54
#       stop_nice("Data in nested column supplied to auto_cogs must contain data frames.")
55
#
56
#     cog_data <- data[[nest_nm]]
57
#   } else {
58
#     cog_data <- data
59
#   }
60
#
61
#   # cog_spec is a list specifying the cognostics and their descriptions
62
#   # so that we can add these in later
63
#
64
#   ## determine which columns to compute what kind of cognostics for
65
#   cog_spec <- list(
66
#     count = tibble(col = NA, cogname = "count", desc = "number of observations")
67
#   )
68
#
69
#   # if any columns are unique per group, add them as an "identity" cognostic
70
#   tmp <- cog_data %>% purrr::map_df(. %>% summarise_all(n_distinct))
71
#   unique_cols <- names(tmp)[sapply(tmp, function(x) all(x == 1))]
72
#   if (length(unique_cols) > 0) {
73
#     cog_spec$unique <- tibble(
74
#       col = unique_cols,
75
#       cogname = sanitize(unique_cols),
76
#       desc = get_label(cog_data[[1]], unique_cols))
77
#   }
78
#
79
#   # if numeric and not unique, get the mean (TODO - other summary stats and group them)
80
#   num_cols <- names(cog_data[[1]])[sapply(cog_data[[1]], is.numeric)]
81
#   num_cols <- setdiff(num_cols, unique_cols)
82
#   if (length(num_cols) > 0)
83
#     cog_spec$num <- tibble(
84
#       col = num_cols,
85
#       cogname = paste0(sanitize(num_cols), "_mean"),
86
#       desc = paste("mean", get_label(cog_data[[1]], num_cols)))
87
#
88
#   tmp <- bind_rows(cog_spec)
89
#   cog_desc <- as.list(tmp$desc)
90
#   names(cog_desc) <- tmp$cogname
91
#
92
#   res <- map_cog(cog_data, function(x) {
93
#     res <- tibble(count = nrow(x))
94
#     for (ii in seq_along(cog_spec$unique$col))
95
#       res[[cog_spec$unique$cogname[ii]]] <- x[[cog_spec$unique$col[ii]]][1]
96
#     for (ii in seq_along(cog_spec$num$col))
97
#       res[[cog_spec$num$cogname[ii]]] <- mean(x[[cog_spec$num$col[ii]]])
98
#     res
99
#   })
100
#
101
#   if (data_is_df) {
102
#     return(
103
#       data %>%
104
#         mutate(auto_cogs = res)
105
#     )
106
#   } else {
107
#     return(res)
108
#   }
109
# }

Read our documentation on viewing source code .

Loading