1
#' Subset rows using their positions
2
#'
3
#' @description
4
#' These are methods for the dplyr generics [slice_min()], [slice_max()], and
5
#' [slice_sample()]. They are translated to SQL using [filter()] and
6
#' window functions (`ROWNUMBER`, `MIN_RANK`, or `CUME_DIST` depending on
7
#' arguments). `slice()`, `slice_head()`, and `slice_tail()` are not supported
8
#' since database tables have no intrinsic order.
9
#'
10
#' If data is grouped, the operation will be performed on each group so that
11
#' (e.g.) `slice_min(db, x, n = 3)` will select the three rows with the smallest
12
#' value of `x` in each group.
13
#'
14
#' @inheritParams arrange.tbl_lazy
15
#' @param ... Not used.
16
#' @param n,prop Provide either `n`, the number of rows, or `prop`, the
17
#'   proportion of rows to select. If neither are supplied, `n = 1` will be
18
#'   used.
19
#'
20
#'   If `n` is greater than the number of rows in the group (or `prop` > 1),
21
#'   the result will be silently truncated to the group size. If the proportion
22
#'   of a group size is not an integer, it is rounded down.
23
#' @param order_by Variable or function of variables to order by.
24
#' @param with_ties Should ties be kept together? The default, `TRUE`, may
25
#'   return more rows than you request. Use FALSE to ignore ties, and return
26
#'   the first n rows.
27
#' @param weight_by,replace Not supported for database backends.
28
#' @name dbplyr-slice
29
#' @aliases NULL
30
#' @examples
31
#' library(dplyr, warn.conflicts = FALSE)
32
#'
33
#' db <- memdb_frame(x = 1:3, y = c(1, 1, 2))
34
#' db %>% slice_min(x) %>% show_query()
35
#' db %>% slice_max(x) %>% show_query()
36
#' db %>% slice_sample() %>% show_query()
37
#'
38
#' db %>% group_by(y) %>% slice_min(x) %>% show_query()
39
#'
40
#' # By default, ties are includes so you may get more rows
41
#' # than you expect
42
#' db %>% slice_min(y, n = 1)
43
#' db %>% slice_min(y, n = 1, with_ties = FALSE)
44
#'
45
#' # Non-integer group sizes are rounded down
46
#' db %>% slice_min(x, prop = 0.5)
47
NULL
48

49
#' @importFrom dplyr slice
50
#' @export
51
slice.tbl_lazy <- function(.data, ...) {
52 1
  abort("slice() is not suppoted on database backends")
53
}
54

55
#' @importFrom dplyr slice_head
56
#' @export
57
slice_head.tbl_lazy <- function(.data, ..., n, prop) {
58 1
  abort(c(
59 1
    "slice_head() is not supported on database backends",
60 1
    i = "Please use slice_min() instead"
61
  ))
62
}
63

64
#' @importFrom dplyr slice_tail
65
#' @export
66
slice_tail.tbl_lazy <- function(.data, ..., n, prop) {
67 1
  abort(c(
68 1
    "slice_tail() is not supported on database backends",
69 1
    i = "Please use slice_max() instead"
70
  ))
71
}
72

73
#' @rdname dbplyr-slice
74
#' @importFrom dplyr slice_min
75
#' @export
76
slice_min.tbl_lazy <- function(.data, order_by, ..., n, prop, with_ties = TRUE) {
77 1
  if (missing(order_by)) {
78 1
    abort("Argument `order_by` is missing, with no default.")
79
  }
80 1
  size <- check_slice_size(n, prop)
81 1
  slice_by(.data, {{order_by}}, size, with_ties = with_ties)
82
}
83

84
#' @rdname dbplyr-slice
85
#' @importFrom dplyr slice_max
86
#' @export
87
slice_max.tbl_lazy <- function(.data, order_by, ..., n, prop, with_ties = TRUE) {
88 1
  if (missing(order_by)) {
89 1
    abort("Argument `order_by` is missing, with no default.")
90
  }
91 1
  size <- check_slice_size(n, prop)
92

93 1
  slice_by(.data, desc({{order_by}}), size, with_ties = with_ties)
94
}
95

96
#' @rdname dbplyr-slice
97
#' @importFrom dplyr slice_sample
98
#' @export
99
slice_sample.tbl_lazy <- function(.data, ..., n, prop, weight_by = NULL, replace = FALSE) {
100 1
  size <- check_slice_size(n, prop)
101 1
  weight_by <- enquo(weight_by)
102 1
  if (size$type == "prop") {
103 1
    abort("Sampling by `prop` is not supported on database backends")
104
  }
105

106 1
  if (!quo_is_null(weight_by)) {
107 1
    abort("Weighted resampling is not supported on database backends")
108
  }
109 1
  if (replace) {
110 1
    abort("Sampling with replacement is not supported on database backends")
111
  }
112

113 1
  slice_by(.data, random(), size, with_ties = FALSE)
114
}
115

116
slice_by <- function(.data, order_by, size, with_ties = FALSE) {
117 1
  old_frame <- op_sort(.data)
118

119 1
  if (with_ties) {
120 1
    window_fun <- switch(size$type,
121 1
      n = expr(min_rank() <= !!size$n),
122 1
      prop = expr(cume_dist() <= !!size$prop)
123
    )
124
  } else {
125 1
    window_fun <- switch(size$type,
126 1
      n = expr(row_number() <= !!size$n),
127 1
      prop = abort("Can only use `prop` when `with_ties = TRUE`")
128
    )
129
  }
130

131 1
  .data %>%
132 1
    window_order({{order_by}}) %>%
133 1
    filter(!!window_fun) %>%
134 1
    window_order(!!!old_frame)
135
}
136

137

138
# helpers -----------------------------------------------------------------
139

140
check_slice_size <- function(n, prop) {
141 1
  if (missing(n) && missing(prop)) {
142 1
    list(type = "n", n = 1L)
143 1
  } else if (!missing(n) && missing(prop)) {
144 1
    if (!is.numeric(n) || length(n) != 1) {
145 1
      abort("`n` must be a single number.")
146
    }
147 1
    if (is.na(n) || n < 0) {
148 1
      abort("`n` must be a non-missing positive number.")
149
    }
150

151 1
    list(type = "n", n = as.integer(n))
152 1
  } else if (!missing(prop) && missing(n)) {
153 1
    if (!is.numeric(prop) || length(prop) != 1) {
154 1
      abort("`prop` must be a single number")
155
    }
156 1
    if (is.na(prop) || prop < 0) {
157 1
      abort("`prop` must be a non-missing positive number.")
158
    }
159 1
    list(type = "prop", prop = prop)
160
  } else {
161 1
    abort("Must supply exactly one of `n` and `prop` arguments.")
162
  }
163
}
164

165
globalVariables(c("min_rank", "cume_dist", "row_number", "desc", "random"))

Read our documentation on viewing source code .

Loading