hafen / trelliscopejs
1
#' Cast Column as a Cognostic
2
#'
3
#' Cast a column of a cognostics data frame as a cognostic object
4
#'
5
#' @param val a scalar value (numeric, character, date, etc.)
6
#' @param desc a description for this cognostic value
7
#' @param group optional categorization of the cognostic for organizational purposes in the viewer (currently not implemented in the viewer)
8
#' @param type the desired type of cognostic you would like to compute (see details)
9
#' @param default_label should this cognostic be used as a panel label in the viewer by default?
10
#' @param default_active should this cognostic be active (available for sort / filter / sample) by default?
11
#' @param filterable should this cognostic be filterable?  Default is \code{TRUE}.  It can be useful to set this to \code{FALSE} if the cognostic is categorical with many unique values and is only desired to be used as a panel label.
12
#' @param sortable should this cognostic be sortable?
13
#' @param log when being used in the viewer for visual univariate and bivariate filters, should the log be computed?  Useful when the distribution of the cognostic is very long-tailed or has large outliers.  Can either be a logical or a positive integer indicating the base.
14
#'
15
#' @return object of class "cog"
16
#'
17
#' @details Different types of cognostics can be specified through the \code{type} argument that will affect how the user is able to interact with those cognostics in the viewer.  This can usually be ignored because it will be inferred from the implicit data type of \code{val}.  But there are special types of cognostics, such as geographic coordinates and relations (not implemented) that can be specified as well.  Current possibilities for \code{type} are "key", "integer", "numeric", "factor", "date", "time", "href".
18
#'
19
#' @export
20
#' @examples
21
#' library(dplyr)
22
#' library(tidyr)
23
#' library(purrr)
24
#' library(ggplot2)
25
#' library(plotly)
26
#' 
27
#' mpg_cog <- mpg %>%
28
#'   nest(data = !one_of(c("manufacturer", "class"))) %>%
29
#'   mutate(
30
#'     cogs = map_cog(data, ~ tibble(
31
#'       mean_city_mpg = cog(mean(.$cty), desc = "Mean city mpg"),
32
#'       mean_hwy_mpg = cog(mean(.$hwy), desc = "Mean highway mpg"),
33
#'       most_common_drv = cog(tail(names(table(.$drv)), 1), desc = "Most common drive type")
34
#'     )),
35
#'     panel = map_plot(data, function(x) {
36
#'       plot_ly(data = x, x = ~cty, y = ~hwy,
37
#'         type = "scatter", mode = "markers") %>%
38
#'         layout(
39
#'           xaxis = list(range = c(9, 47)),
40
#'           yaxis = list(range = c(7, 37)))
41
#'     })
42
#'   )
43
#'
44
#' trelliscope(mpg_cog, name = "city_vs_highway_mpg", nrow = 1, ncol = 2)
45
cog <- function(val = NULL, desc = "", group = "common",
46
  type = NULL, default_label = FALSE, default_active = TRUE,
47
  filterable = TRUE, sortable = TRUE, log = NULL) {
48

49 1
  cog_types <- list(
50 1
    key           = as.character,
51 1
    integer       = as.integer,
52 1
    numeric       = as.numeric,
53 1
    factor        = as.character,
54 1
    date          = as.Date,
55 1
    time          = as.POSIXct,
56 1
    panelSrc      = as.character,
57 1
    panelSrcLocal = as.character,
58
    # color       = as.character,
59
    # geo         = as.cogGeo,
60
    # rel         = as.cogRel,
61
    # hier        = as.cogHier,
62 1
    href          = as.character
63
  )
64

65 1
  types <- names(cog_types)
66

67 1
  if (!is.null(type)) {
68 1
    if (!type %in% types)
69 0
      stop_nice("Invalid cognostics type:", type)
70

71 1
    val <- try(cog_types[[type]](val))
72 1
    if (inherits(val, "try-error"))
73 0
      val <- NA
74
  } else {
75
    # TODO: if type is not specified, set type to NA and wait until final
76
    # call to as_cognostics() to infer the type (to make sure we have them all)
77

78
    # try to infer type
79 1
    if (is.factor(val))
80 0
      val <- as.character(val)
81 1
    type <- infer_cog_type(val)
82 1
    if (is.na(type))
83 0
      val <- NA
84
  }
85

86 1
  if (is.null(log))
87 1
    log <- NA
88

89 1
  if (is.logical(log)) {
90 1
    log <- ifelse(log, 10, NA)
91
  }
92 1
  if (is.numeric(log)) {
93 0
    if (log <= 0)
94 0
      log <- NA
95
  }
96

97 1
  cog_attrs <- list(
98 1
    desc = desc,
99 1
    type = type,
100 1
    group = group,
101 1
    defLabel = default_label,
102 1
    defActive = default_active,
103 1
    filterable = filterable,
104 1
    log = log
105
  )
106 1
  attr(val, "cog_attrs") <- cog_attrs
107

108 1
  class(val) <- c("cog", class(val))
109 1
  val
110
}
111

112
infer_cog_type <- function(val) {
113 1
  if (is.factor(val) || is.character(val)) {
114 1
    if (all(grepl("^http://|^https://", val))) {
115 0
      type <- "href"
116
    } else {
117 1
      type <- "factor"
118
    }
119 1
  } else if (is.numeric(val)) {
120 1
    type <- "numeric"
121 0
  } else if (inherits(val, "Date")) {
122 0
    type <- "date"
123 0
  } else if (inherits(val, "POSIXct")) {
124 0
    type <- "time"
125
  } else {
126 0
    type <- NA
127
  }
128 1
  type
129
}
130

131
#' Helper function for creating a cognostic for a link to another display in a filtered state
132
#' @param display A string indicating the name of the display to link to.
133
#' @param var A string indicating the variable name to filter on.
134
#' @param val A string indicating the value of the filter.
135
#' @param desc a description for this cognostic value
136
#' @param group optional categorization of the cognostic for organizational purposes in the viewer (currently not implemented in the viewer)
137
#' @param default_label should this cognostic be used as a panel label in the viewer by default?
138
#' @param default_active should this cognostic be active (available for sort / filter / sample) by default?
139
#' @param filterable should this cognostic be filterable?  Default is \code{TRUE}.  It can be useful to set this to \code{FALSE} if the cognostic is categorical with many unique values and is only desired to be used as a panel label.
140
#' @param sortable should this cognostic be sortable?
141
#' @export
142
cog_disp_filter <- function(display, var, val,
143
  desc = "link", group = "common",
144
  default_label = FALSE, default_active = FALSE,
145
  filterable = FALSE, sortable = FALSE) {
146 0
  x <- paste0("#display=", display, "&filter=var:",
147 0
    var, ";type:select;val:", val)
148

149 0
  cog(x, type = "href", desc = desc, group = group, 
150 0
    default_label = default_label,
151 0
    default_active = default_active,
152 0
    filterable = filterable, sortable = sortable,
153 0
    log = FALSE)
154
}
155

156
#' Href Cognostic
157
#'
158
#' Create href to be used as cognostics in a trelliscope display
159
#'
160
#' @param x URL to link to
161
#' @param desc,group,default_label,default_active,filterable,sortable,log arguments passed to \code{\link{cog}}
162
#'
163
#' @seealso \code{\link{cog}}
164
#' @examples
165
#' \donttest{
166
#' library(dplyr)
167
#' library(tidyr)
168
#' library(plotly)
169
#' iris %>%
170
#'   nest(data = -Species) %>%
171
#'   mutate(
172
#'     panel = map_plot(data, function(x) {
173
#'       plot_ly(data = x, x = ~Sepal.Length, y = ~Sepal.Width,
174
#'         type = "scatter", mode = "markers")
175
#'     }),
176
#'     wiki_link = cog_href(paste0("https://en.wikipedia.org/wiki/Iris_",
177
#'       tolower(Species))[1], default_label = TRUE,
178
#'       desc = "link to species on wikipedia")
179
#'   ) %>%
180
#'   trelliscope(name = "iris_species", ncol = 3)
181
#' }
182
#' @export
183
cog_href <- function(x, desc = "link", group = "common",
184
  default_label = FALSE, default_active = FALSE, filterable = FALSE,
185
  sortable = FALSE, log = FALSE) {
186

187 0
  cog(x, type = "href", desc = desc, group = group, default_label = default_label,
188 0
    default_active = default_active, filterable = filterable, sortable = sortable,
189 0
    log = log)
190
}
191

192
#' Cast a data frame as a cognostics data frame
193
#'
194
#' @param x a data frame
195
#' @param cond_cols the column name(s) that comprise the conditioning variables
196
#' @param key_col the column name that indicates the panel key
197
#' @param cog_desc an optional named list of descriptions for the cognostics columns
198
#' @param needs_key does the result need to have a "key" column?
199
#' @param needs_cond does the result need to have conditioning variable columns?
200
#' @param group value to be used in the \code{\link{cog}} group
201
#' @export
202
as_cognostics <- function(x, cond_cols, key_col = NULL, cog_desc = NULL,
203
  needs_key = TRUE, needs_cond = TRUE, group = "common") {
204
  # make each column a true cognostic so things are consistent downstream
205

206 1
  if (needs_key) {
207 1
    if (is.null(key_col))
208 1
      key_col <- "panelKey"
209 1
    if (! key_col %in% names(x)) {
210 1
      x$panelKey <- cog(sanitize( # nolint
211 1
        apply(x[cond_cols], 1, paste, collapse = "_")),
212 1
        desc = "panel key", type = "key", group = "panelKey",
213 1
        default_active = TRUE, filterable = FALSE)
214
    }
215
  }
216

217 1
  if (needs_cond) {
218 1
    if (! all(cond_cols %in% names(x)))
219 0
      stop_nice("The cognostics data frame must have all specified cond_cols:",
220 0
        paste(cond_cols, collapse = ", "))
221

222 1
    for (cl in cond_cols) {
223 1
      x[[cl]] <- cog(x[[cl]], desc = "conditioning variable",
224 1
        type = ifelse(is.numeric(x[[cl]]), "numeric", "factor"),
225 1
        group = "condVar", default_label = TRUE)
226
    }
227
  }
228

229
  # TODO: make sure cond_cols are unique and key_col is unique
230

231
  # any variables that aren't cogs, fill them in...
232 1
  has_no_cog <- which(!sapply(x, function(x) inherits(x, "cog")))
233 1
  nms <- names(x)
234

235 1
  if (length(has_no_cog) > 0) {
236 1
    for (idx in has_no_cog) {
237 1
      desc <- cog_desc[[nms[idx]]]
238 1
      if (!is.character(desc))
239 1
        desc <- nms[idx]
240

241 1
      if (all(grepl("https*://", x[[idx]]))) {
242 0
        x[[idx]] <- cog_href(x[[idx]], desc = paste(desc, "(link)"), group = group)
243
      } else {
244 1
        x[[idx]] <- cog(x[[idx]], desc = desc, group = group)
245
      }
246
    }
247
  }
248

249
  # get rid of cogs that are all NA
250 1
  na_cogs <- which(sapply(x, function(a) all(is.na(a))))
251 1
  if (length(na_cogs) > 0) {
252 0
    message("Removing the following cognostics that are all NA: ",
253 0
      paste(nms[na_cogs], collapse = ", "))
254 0
    x[na_cogs] <- NULL
255
  }
256

257 1
  class(x) <- c("cognostics", class(x))
258 1
  x
259
}
260

261

262

263
bind_cog_list_and_descs <- function(cog_list) {
264
  # retrieve autocog description (or any other desc)
265 1
  non_null_pos <- ! unlist(lapply(cog_list, is.null))
266

267 1
  has_factor <- any(unlist(lapply(cog_list[[1]], is.factor)))
268 1
  if (!inherits(cog_list[[1]], "tibble") && has_factor)
269 0
    message(
270 0
      "Note: it is advised to use tibble() when creating cognostic columns, ",
271 0
      "to avoid issues that arise with data.frame and stringsAsFactors = TRUE.")
272

273 1
  res <- suppressWarnings(dplyr::bind_rows(cog_list))
274

275
  # retrieve the first non null cognostic descriptions
276
  #   from each nested cog data
277 1
  cog_desc <- list()
278 1
  if (sum(non_null_pos) > 0) {
279
    # get first non null attr
280 1
    non_null_row_dt <- cog_list[non_null_pos][[1]]
281

282
    # get attributes
283 1
    one_row_attrs <- lapply(non_null_row_dt, function(x) attr(x, "cog_attrs"))
284 1
    one_row_class <- lapply(non_null_row_dt, function(x) {
285 1
      res <- class(x)
286 1
      res[res == "factor"] <- "character"
287 1
      res
288
    })
289

290
    # extract description attrs
291 1
    cog_desc <- lapply(one_row_attrs, `[[`, "desc")
292

293
    # store attributes of each column of first non null info
294 1
    for (nm in names(res)) {
295 1
      attr(res[[nm]], "cog_attrs") <- one_row_attrs[[nm]]
296 1
      class(res[[nm]]) <- one_row_class[[nm]]
297
    }
298
  }
299

300 1
  list(
301 1
    cog_df = res,
302 1
    cog_desc = cog_desc
303
  )
304
}
305

306
#' @importFrom autocogs panel_cogs
307
cog_df_info <- function(x, panel_col, state, auto_cog = FALSE, nested_data_list = NULL,
308
  nested_cog_attrs = NULL) {
309

310 1
  atomic_cols <- names(x)[sapply(x, is.atomic)]
311 1
  non_atomic_cols <- setdiff(names(x), c(atomic_cols, panel_col))
312 1
  is_nested <- length(non_atomic_cols) > 0
313

314 1
  if (length(atomic_cols) == 0)
315 0
    stop_nice("There must be at least one atomic column in the data frame passed in",
316 0
      "to trelliscope.data.frame")
317

318 1
  cond_cols <- find_cond_cols(x[atomic_cols], is_nested)
319

320
  # if we are no longer sorted by a cond_col but are sorted by something else
321
  # and if sort state is not already specified, then set that as state
322 1
  if (is.unsorted(x[[cond_cols[1]]])) {
323 1
    sort_cols <- find_sort_cols(x[setdiff(atomic_cols, cond_cols)])
324

325 1
    if (nrow(sort_cols) > 0) {
326 1
      cond_not_sorted <- !sort_cols$name %in% cond_cols
327 1
      other_sorted <- setdiff(sort_cols$name, cond_cols)
328 1
      if (is.null(state$sort) && cond_not_sorted && length(other_sorted) > 0) {
329 1
        if (is.null(state))
330 1
          state <- list()
331 1
        state$sort <- lapply(other_sorted, function(a) {
332 1
          list(name = a, dir = sort_cols$dir[sort_cols$name == a])
333
        })
334 1
        if (is.null(state$labels)) {
335 1
          state$labels <- c(cond_cols, other_sorted)
336
        }
337
      }
338
    }
339
  }
340

341 1
  cogs <- list(as_cognostics(x[atomic_cols], cond_cols))
342

343 1
  if (!is.null(nested_data_list)) {
344
    # add unique data within nested data
345 1
    distinct_counts <- nested_data_list %>%
346 1
      purrr::map_df(. %>% summarise_all(n_distinct))
347 1
    unique_cols <- names(distinct_counts)[sapply(distinct_counts, function(x) all(x == 1))]
348 1
    if (length(unique_cols) > 0) {
349 1
      tmp <- nested_data_list %>%
350 1
        lapply(function(sub_dt) {
351 1
          aa <- sub_dt[1, unique_cols]
352 1
          for (jj in seq_along(aa))
353 1
            class(aa[[jj]]) <- setdiff(class(aa[[jj]]), "cog")
354 1
          aa
355
        }) %>%
356 1
        dplyr::bind_rows()
357

358
      # add nested cog attrs back in, if specified
359 1
      for (nm in names(tmp)) {
360 1
        ca <- nested_cog_attrs[[nm]]
361 1
        if (!is.null(ca)) {
362 1
          attr(tmp[[nm]], "cog_attrs") <- ca
363 1
          class(tmp[[nm]]) <- c(class(tmp[[nm]]), "cog")
364
        }
365
      }
366

367 1
      cogs[[length(cogs) + 1]] <-  as_cognostics(
368 1
        tmp,
369 1
        needs_key = FALSE, needs_cond = FALSE,
370 1
        group = "_data",
371 1
        cog_desc = NULL
372
      )
373
    }
374

375
    # calculate non unique cognostics
376 1
    non_unique_cols <- setdiff(names(distinct_counts), c(unique_cols, ".id"))
377 1
    if (length(non_unique_cols) > 1) {
378

379
      # run a loop over all non_unique_cols
380 1
      for (i in seq_along(non_unique_cols)) {
381 1
        non_unique_col <- non_unique_cols[[i]]
382

383 1
        non_unique_cog_i <- lapply(nested_data_list, function(sub_dt) {
384 1
          column <- sub_dt[[non_unique_col]]
385 1
          if (is.character(column) || is.factor(column)) {
386 1
            autocogs::autocog_univariate_discrete(as.character(column))
387 1
          } else if (is.numeric(column)) {
388 1
            autocogs::autocog_univariate_continuous(column)
389
          } else {
390 0
            NULL
391
          }
392
        })
393

394 1
        tmp <- bind_cog_list_and_descs(non_unique_cog_i)
395 1
        non_unique_cog_df <- tmp$cog_df
396 1
        cog_desc <- tmp$cog_desc
397

398 1
        if (nrow(non_unique_cog_df) > 0) {
399
          # add the name to make it extra descriptive
400
          # TODO remove once visual grouping is done
401 1
          names(cog_desc) <- paste0(non_unique_col, "_", names(cog_desc))
402 1
          colnames(non_unique_cog_df) <- paste0(non_unique_col, "_", colnames(non_unique_cog_df))
403

404 1
          cogs[[length(cogs) + 1]] <- as_cognostics(
405 1
            non_unique_cog_df,
406 1
            needs_key = FALSE, needs_cond = FALSE,
407 1
            group = non_unique_col,
408 1
            cog_desc = cog_desc
409
          )
410
        }
411
      }
412
    }
413
  }
414

415 1
  if (length(non_atomic_cols) > 0) {
416 1
    usable <- non_atomic_cols[sapply(x[non_atomic_cols],
417 1
      function(a) is.data.frame(a[[1]]))]
418 1
    needs_auto <- usable[sapply(x[usable], function(a) {
419 1
      any(sapply(a, nrow) > 1)
420
    })]
421

422 1
    no_needs_auto <- setdiff(usable, needs_auto)
423 1
    for (a in no_needs_auto) {
424 1
      to_auto_list <- x[[a]]
425 1
      if (inherits(to_auto_list, "trelliscope_cogs")) {
426 1
        class(to_auto_list) <- "list"
427
      }
428

429 1
      tmp <- bind_cog_list_and_descs(to_auto_list)
430 1
      auto_df <- tmp$cog_df
431 1
      cog_desc <- tmp$cog_desc
432

433 1
      cogs[[length(cogs) + 1]] <- auto_df %>%
434 1
        as_cognostics(
435 1
          needs_key = FALSE, needs_cond = FALSE,
436 1
          group = a,
437 1
          cog_desc = cog_desc
438
      )
439
    }
440
  }
441

442
  # add automatic cognostics from autocogs
443 1
  if (!(identical(auto_cog, FALSE) || is.null(auto_cog))) {
444 1
    panel_cog_list <- panel_cogs(x, panel_col = panel_col, layers = auto_cog)
445 1
    for (nm in names(panel_cog_list)) {
446 1
      tmp <- bind_cog_list_and_descs(panel_cog_list[[nm]])
447 1
      panel_cog_dt <- tmp$cog_df
448 1
      cog_desc <- tmp$cog_desc
449 1
      names(cog_desc) <- paste0(nm, "_", names(cog_desc))
450 1
      colnames(panel_cog_dt) <- paste0(nm, "_", colnames(panel_cog_dt))
451 1
      cogs[[length(cogs) + 1]] <- as_cognostics(
452 1
          panel_cog_dt,
453 1
          needs_key = FALSE, needs_cond = FALSE,
454 1
          group = nm,
455 1
          cog_desc = cog_desc
456
      )
457

458
    }
459
  }
460

461 1
  cog_df <- bind_cols(cogs)
462

463 1
  list(
464 1
    cog_df = cog_df,
465 1
    cond_cols = cond_cols,
466 1
    atomic_cols = atomic_cols,
467 1
    non_atomic_cols = non_atomic_cols,
468 1
    state = state
469
  )
470
}

Read our documentation on viewing source code .

Loading