tidyverse / dtplyr

@@ -14,7 +14,7 @@
Loading
14 14
# Make sure data.table functions are available so dtplyr still works
15 15
# even when data.table isn't attached
16 16
dt_funs <- c(
17 -
  "copy", "dcast", "melt", "nafill",
17 +
  "CJ", "copy", "dcast", "melt", "nafill",
18 18
  "fcase", "fcoalesce", "fifelse", "fintersect", "frank", "frankv", "fsetdiff", "funion",
19 19
  "setcolorder", "setnames"
20 20
)

@@ -128,7 +128,8 @@
Loading
128 128
    vars = union(x$vars, y$vars),
129 129
    i = y,
130 130
    on = on,
131 -
    locals = utils::modifyList(x$locals, y$locals)
131 +
    locals = utils::modifyList(x$locals, y$locals),
132 +
    allow_cartesian = TRUE
132 133
  )
133 134
}
134 135

@@ -0,0 +1,38 @@
Loading
1 +
#' Complete a data frame with missing combinations of data
2 +
#'
3 +
#' @description
4 +
#' This is a method for the tidyr `complete()` generic. This is a wrapper
5 +
#' around `dtplyr` translations for `expand()`, `full_join()`, and `replace_na()`
6 +
#' that's useful for completing missing combinations of data.
7 +
#'
8 +
#' @param data A [lazy_dt()].
9 +
#' @inheritParams tidyr::complete
10 +
#' @examples
11 +
#' library(tidyr)
12 +
#' tbl <- tibble(x = 1:2, y = 1:2, z = 3:4)
13 +
#' dt <- lazy_dt(tbl)
14 +
#'
15 +
#' dt %>%
16 +
#'   complete(x, y)
17 +
#'
18 +
#' dt %>%
19 +
#'   complete(x, y, fill = list(z = 10L))
20 +
# exported onLoad
21 +
complete.dtplyr_step <- function(data, ..., fill = list()) {
22 +
  dots <- enquos(...)
23 +
  dots <- dots[!vapply(dots, quo_is_null, logical(1))]
24 +
  if (length(dots) == 0) {
25 +
    return(data)
26 +
  }
27 +
28 +
  full <- tidyr::expand(data, !!!dots)
29 +
  full <- dplyr::full_join(full, data, by = full$vars)
30 +
  full <- tidyr::replace_na(full, replace = fill)
31 +
  full
32 +
}
33 +
34 +
# exported onLoad
35 +
complete.data.table <- function(data, ..., fill = list()) {
36 +
  data <- lazy_dt(data)
37 +
  tidyr::complete(data, ..., fill = fill)
38 +
}

@@ -0,0 +1,78 @@
Loading
1 +
#' Expand data frame to include all possible combinations of values.
2 +
#'
3 +
#' @description
4 +
#' This is a method for the tidyr `expand()` generic. It is translated to
5 +
#' [data.table::CJ()].
6 +
#'
7 +
#' @param data A [lazy_dt()].
8 +
#' @inheritParams tidyr::expand
9 +
#' @examples
10 +
#' library(tidyr)
11 +
#'
12 +
#' fruits <- lazy_dt(tibble(
13 +
#'   type   = c("apple", "orange", "apple", "orange", "orange", "orange"),
14 +
#'   year   = c(2010, 2010, 2012, 2010, 2010, 2012),
15 +
#'   size  =  factor(
16 +
#'     c("XS", "S",  "M", "S", "S", "M"),
17 +
#'     levels = c("XS", "S", "M", "L")
18 +
#'   ),
19 +
#'   weights = rnorm(6, as.numeric(size) + 2)
20 +
#' ))
21 +
#'
22 +
#' # All possible combinations ---------------------------------------
23 +
#' # Note that all defined, but not necessarily present, levels of the
24 +
#' # factor variable `size` are retained.
25 +
#' fruits %>% expand(type)
26 +
#' fruits %>% expand(type, size)
27 +
#' fruits %>% expand(type, size, year)
28 +
#'
29 +
#' # Other uses -------------------------------------------------------
30 +
#' fruits %>% expand(type, size, 2010:2012)
31 +
#'
32 +
#' # Use `anti_join()` to determine which observations are missing
33 +
#' all <- fruits %>% expand(type, size, year)
34 +
#' all
35 +
#' all %>% dplyr::anti_join(fruits)
36 +
#'
37 +
#' # Use with `right_join()` to fill in missing rows
38 +
#' fruits %>% dplyr::right_join(all)
39 +
# exported onLoad
40 +
expand.dtplyr_step <- function(data, ..., .name_repair = "check_unique") {
41 +
  dots <- capture_dots(data, ..., .j = FALSE)
42 +
  dots <- dots[!vapply(dots, is_null, logical(1))]
43 +
  if (length(dots) == 0) {
44 +
    return(data)
45 +
  }
46 +
47 +
  named_dots <- have_name(dots)
48 +
  if (any(!named_dots)) {
49 +
    # Auto-names generated by enquos() don't always work with the CJ() step
50 +
      ## Ex: `1:3`
51 +
    # Replicates the "V" naming convention data.table uses
52 +
    symbol_dots <- vapply(dots, is_symbol, logical(1))
53 +
    needs_v_name <- !symbol_dots & !named_dots
54 +
    v_names <- paste0("V", 1:length(dots))
55 +
    names(dots)[needs_v_name] <- v_names[needs_v_name]
56 +
    names(dots)[symbol_dots] <- lapply(dots[symbol_dots], as_name)
57 +
  }
58 +
  names(dots) <- vctrs::vec_as_names(names(dots), repair = .name_repair)
59 +
60 +
  on <- names(dots)
61 +
  cj <- expr(CJ(!!!syms(on), unique = TRUE))
62 +
63 +
  out <- distinct(data, !!!syms(data$groups), !!!dots)
64 +
  if (length(data$groups) == 0) {
65 +
    out <- step_subset(out, i = cj, on = on)
66 +
  } else {
67 +
    on <- call2(".", !!!syms(on))
68 +
    out <- step_subset(out, j = expr(.SD[!!cj, on = !!on]))
69 +
  }
70 +
71 +
  out
72 +
}
73 +
74 +
# exported onLoad
75 +
expand.data.table <- function(data, ..., .name_repair = "check_unique") {
76 +
  data <- lazy_dt(data)
77 +
  tidyr::expand(data, ..., .name_repair = .name_repair)
78 +
}

@@ -157,8 +157,8 @@
Loading
157 157
    prop = expr(!!smaller_ranks(!!order_by, !!size$prop * .N, ties.method = ties.method))
158 158
  )
159 159
160 -
  step_subset_i(.data, i) %>%
161 -
    arrange(!!order_by, .by_group = TRUE)
160 +
  out <- step_subset_i(.data, i)
161 +
  arrange(out, !!order_by, .by_group = TRUE)
162 162
}
163 163
164 164
smaller_ranks <- function(x, y, ties.method = "min") {

@@ -5,8 +5,9 @@
Loading
5 5
                        arrange = parent$arrange,
6 6
                        i = NULL,
7 7
                        j = NULL,
8 -
                        on = character()
9 -
                        ) {
8 +
                        on = character(),
9 +
                        allow_cartesian = NULL
10 +
) {
10 11
11 12
  stopifnot(is_step(parent))
12 13
  stopifnot(is.null(i) || is_expression(i) || is_step(i))
@@ -22,6 +23,7 @@
Loading
22 23
    i = i,
23 24
    j = j,
24 25
    on = on,
26 +
    allow_cartesian = allow_cartesian,
25 27
    implicit_copy = !is.null(i) || !is.null(j),
26 28
    class = "dtplyr_step_subset"
27 29
  )
@@ -117,7 +119,7 @@
Loading
117 119
118 120
  if (length(x$on) > 0) {
119 121
    out$on <- call2(".", !!!syms(x$on))
120 -
    out$allow.cartesian <- TRUE
122 +
    out$allow.cartesian <- x$allow_cartesian
121 123
  }
122 124
  out
123 125
}
Files Coverage
R 93.43%
Project Totals (29 files) 93.43%
1
comment: false
2

3
coverage:
4
  status:
5
    project:
6
      default:
7
        target: auto
8
        threshold: 1%
9
        informational: true
10
    patch:
11
      default:
12
        target: auto
13
        threshold: 1%
14
        informational: true
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading