TysonStanley / tidyfast
1
#' Unnest: Fast Unnesting of Data Tables
2
#'
3
#' Quickly unnest data tables, particularly those nested by \code{dt_nest()}.
4
#'
5
#' @param dt_ the data table to unnest
6
#' @param col  the column to unnest
7
#' @param ... any of the other variables in the nested table that you want to keep in the unnested table. Bare variable names. If none are provided, all variables are kept.
8
#'
9
#' @examples
10
#'
11
#' library(data.table)
12
#' dt <- data.table(
13
#'   x = rnorm(1e5),
14
#'   y = runif(1e5),
15
#'   grp = sample(1L:3L, 1e5, replace = TRUE)
16
#'   )
17
#'
18
#' nested <- dt_nest(dt, grp)
19
#' dt_unnest(nested, col = data)
20
#'
21
#' @import data.table
22
#'
23
#' @export
24
dt_unnest <- function(dt_, col, ...){
25 1
  UseMethod("dt_unnest", dt_)
26
}
27

28
#' @export
29
dt_unnest.default <- function(dt_, col, ...){
30 1
  if (isFALSE(is.data.table(dt_)))
31 1
    dt_ <- as.data.table(dt_)
32

33 1
  col    <- substitute(col)
34 1
  keep   <- substitute(alist(...))
35 1
  names  <- colnames(dt_)
36 1
  others <- names[-match(paste(col), names)]
37 1
  rows   <- sapply(dt_[[paste(col)]], nrow)
38

39 1
  if (length(keep) > 1)
40 0
    others <- others[others %in% paste(keep)[-1]]
41

42 1
  others_dt <- dt_[, ..others]
43 1
  classes   <- sapply(others_dt, typeof)
44 1
  keep      <- names(classes)[classes != "list"]
45 1
  others_dt <- others_dt[, ..keep]
46 1
  others_dt <- lapply(others_dt, rep, times = rows)
47

48 1
  dt_[, list(as.data.table(others_dt), rbindlist(eval(col)))]
49
}
50

51

52
#' Hoist: Fast Unnesting of Vectors
53
#'
54
#' Quickly unnest vectors nested in list columns. Still experimental (has some potentially unexpected behavior in some situations)!
55
#'
56
#' @param dt_ the data table to unnest
57
#' @param ... the columns to unnest (must all be the sample length when unnested); use bare names of the variables
58
#'
59
#' @aliases dt_unnest_vec
60
#'
61
#' @examples
62
#'
63
#' library(data.table)
64
#' dt <- data.table(
65
#'    x = rnorm(1e5),
66
#'    y = runif(1e5),
67
#'    nested1 = lapply(1:10, sample, 10, replace = TRUE),
68
#'    nested2 = lapply(c("thing1", "thing2"), sample, 10, replace = TRUE),
69
#'    id = 1:1e5
70
#'    )
71
#'
72
#' dt_hoist(dt,
73
#'          nested1, nested2,
74
#'          by = id)
75
#'
76
#' @import data.table
77
#'
78
#' @export
79
dt_hoist <- function(dt_, ...){
80 1
  UseMethod("dt_hoist", dt_)
81
}
82

83
#' @export
84
dt_hoist.default <- function(dt_, ...){
85 1
  if (isFALSE(is.data.table(dt_)))
86 1
    dt_ <- as.data.table(dt_)
87

88 1
  pasted_dots <- paste(substitute(list(...)))[-1L]
89 1
  classes <- sapply(dt_, class)
90 1
  typeofs <- sapply(dt_, typeof)
91 1
  v.names <- names(classes)
92 1
  keep <- v.names[classes != "list" & typeofs != "list"]
93 1
  drop <- v.names[classes == "list" | typeofs == "list"]
94 1
  drop <- drop[!drop %in% pasted_dots]
95 1
  keep <- keep[!keep %in% pasted_dots]
96 1
  keep <- paste(keep, collapse = ",")
97 1
  cols <- substitute(unlist(list(...), recursive = FALSE))
98

99 1
  message("The following columns were dropped because ",
100 1
          "they are list-columns (but not being hoisted): ",
101 1
          paste(drop, collapse = ", "))
102

103 1
  dt_ <- dt_[, eval(cols), by = keep]
104 1
  dt_ <- .naming(dt_, substitute(list(...)))
105 1
  dt_
106
}
107

108
.naming <- function(dt_, cols){
109

110 1
  new_names <- paste(cols)[-1]
111 1
  old_names <- paste0("V", seq_along(new_names))
112

113 1
  setnames(dt_,
114 1
           old = old_names,
115 1
           new = new_names,
116 1
           skip_absent = TRUE)
117 1
  dt_
118
}

Read our documentation on viewing source code .

Loading