gogonzo / runner

@@ -0,0 +1,105 @@
Loading
1 +
#' Set window parameters
2 +
#'
3 +
#' Set window parameters for \link{runner}. This function sets the
4 +
#' attributes to \code{x} (only \code{data.frame}) object and saves user effort
5 +
#' to specify window parameters in further multiple \link{runner} calls.
6 +
#' @inheritParams runner
7 +
#' @return x object which \link{runner} can be executed on.
8 +
#' @examples
9 +
#' library(dplyr)
10 +
#'
11 +
#' data <- data.frame(
12 +
#'  index = c(2, 3, 3, 4, 5, 8, 10, 10, 13, 15),
13 +
#'  a = rep(c("a", "b"), each = 5),
14 +
#'  b = 1:10
15 +
#' )
16 +
#'
17 +
#' data %>%
18 +
#'  group_by(a) %>%
19 +
#'  run_by(idx = "index", k = 5) %>%
20 +
#'  mutate(
21 +
#'    c = runner(
22 +
#'      x = .,
23 +
#'      f = function(x) {
24 +
#'        paste(x$b, collapse = ">")
25 +
#'      }
26 +
#'    ),
27 +
#'    d = runner(
28 +
#'      x = .,
29 +
#'      f = function(x) {
30 +
#'        sum(x$b)
31 +
#'      }
32 +
#'    )
33 +
#'  )
34 +
#' @export
35 +
run_by <- function(x, idx, k, lag, na_pad, at) {
36 +
  if (!is.data.frame(x)) {
37 +
    stop("`run_by` should be used only for `data.frame`. \n
38 +
         Use `runner` on x directly.")
39 +
  }
40 +
41 +
  if (!missing(k)) x <- set_run_by_difftime(x, k)
42 +
  if (!missing(lag)) x <- set_run_by_difftime(x, lag)
43 +
  if (!missing(idx)) x <- set_run_by_index(x, idx)
44 +
  if (!missing(at)) x <- set_run_by_index(x, at)
45 +
  if (!missing(na_pad)) attr(x, "na_pad") <- na_pad
46 +
47 +
  return(x)
48 +
}
49 +
50 +
set_run_by_index <- function(x, arg) {
51 +
  arg_name <- deparse(substitute(arg))
52 +
  attr(x, arg_name) <- if (is.character(arg) && length(arg) == 1 && arg %in% names(x)) {
53 +
    arg
54 +
  } else if (is.numeric(arg) || inherits(arg, c("Date", "POSIXct", "POSIXxt", "POSIXlt"))) {
55 +
    arg
56 +
  } else {
57 +
    stop(
58 +
      sprintf(
59 +
        "`%s` should be either:
60 +
         - column name of `x`
61 +
         - vector of type `numeric`, `Date`, `POSIXct` or `POSIXlt`",
62 +
        arg_name
63 +
      ),
64 +
      call. = FALSE
65 +
    )
66 +
  }
67 +
  return(x)
68 +
}
69 +
70 +
set_run_by_difftime <- function(x, arg) {
71 +
  arg_name <- deparse(substitute(arg))
72 +
73 +
  attr(x, arg_name) <- if (is.character(arg)) {
74 +
    if (length(arg) == 1 && arg %in% names(x)) {
75 +
      arg
76 +
    } else if (all(is_datetime_valid(arg))) {
77 +
      arg
78 +
    } else {
79 +
      stop(
80 +
        sprintf(
81 +
          "`%s` is invalid, should be either:
82 +
           - column name of `x`
83 +
           - `difftime` class or character describing diffitme (see at argument in `seq.POSIXt`)
84 +
           - `numeric` or `integer` vector",
85 +
          arg_name
86 +
        ),
87 +
        call. = FALSE
88 +
      )
89 +
    }
90 +
  } else if (is.numeric(arg) || is(arg, "difftime")) {
91 +
    arg
92 +
  } else {
93 +
    stop(
94 +
      sprintf(
95 +
        "`%s` is invalid, should be either:
96 +
           - column name of `x`
97 +
           - `difftime` class or character describing diffitme (see at argument in `seq.POSIXt`)
98 +
           - `numeric` or `integer` `vector`",
99 +
        arg_name
100 +
      ),
101 +
      call. = FALSE
102 +
    )
103 +
  }
104 +
  return(x)
105 +
}

@@ -1,21 +1,22 @@
Loading
1 1
#' Apply running function
2 2
#'
3 3
#' Applies custom function on running windows.
4 -
#' @param x (`vector`, `data.frame`, `matrix`)\cr
4 +
#' @param x (`vector`, `data.frame`, `matrix`, `xts`)\cr
5 5
#'  Input in runner custom function `f`.
6 6
#'
7 7
#' @param k (`integer` vector or single value)\cr
8 8
#'  Denoting size of the running window. If `k` is a single value then window
9 9
#'  size is constant for all elements, otherwise if `length(k) == length(x)`
10 10
#'  different window size for each element. One can also specify `k` in the same
11 -
#'  way as by in \code{\link[base]{seq.POSIXt}}. More in details.
11 +
#'  way as `by` argument in \code{\link[base]{seq.POSIXt}}.
12 +
#'  See 'Specifying time-intervals' in details section.
12 13
#'
13 14
#' @param lag (`integer` vector or single value)\cr
14 15
#'  Denoting window lag. If `lag` is a single value then window lag is constant
15 16
#'  for all elements, otherwise if `length(lag) == length(x)` different window
16 17
#'  size for each element. Negative value shifts window forward. One can also
17 -
#'  specify `lag` in the same way as by in \code{\link[base]{seq.POSIXt}}.
18 -
#'  More in details.
18 +
#'  specify `lag` in the same way as `by` argument in \code{\link[base]{seq.POSIXt}}.
19 +
#'  See 'Specifying time-intervals' in details section.
19 20
#'
20 21
#' @param idx (`integer`, `Date`, `POSIXt`)\cr
21 22
#'  Optional integer vector containing sorted (ascending) index of observation.
@@ -24,15 +25,16 @@
Loading
24 25
#'  are depending on `idx`. Length of `idx` have to be equal of length `x`.
25 26
#'
26 27
#' @param f (`function`)\cr
27 -
#' Applied on windows created from `x`. This function is meant to summarize
28 -
#' windows and create single element for each window, but one can also specify
29 -
#' function which return multiple elements (runner output will be a list).
30 -
#' By default runner returns windows as is (`f = function(x)`).
28 +
#'  Applied on windows created from `x`. This function is meant to summarize
29 +
#'  windows and create single element for each window, but one can also specify
30 +
#'  function which return multiple elements (runner output will be a list).
31 +
#'  By default runner returns windows as is (`f = function(x)`).
31 32
#'
32 33
#' @param at (`integer`, `Date`, `POSIXt`, `character` vector)\cr
33 34
#'  Vector of any size and any value defining output data points. Values of the
34 35
#'  vector defines the indexes which data is computed at. Can be also `POSIXt`
35 -
#'  sequence increment \code{\link[base]{seq.POSIXt}}. More in details.
36 +
#'  sequence increment used in `at` argument in \code{\link[base]{seq.POSIXt}}.
37 +
#'  See 'Specifying time-intervals' in details section.
36 38
#'
37 39
#' @param na_pad (`logical` single value)\cr
38 40
#'  Whether incomplete window should return `NA` (if `na_pad = TRUE`)
@@ -43,6 +45,18 @@
Loading
43 45
#'  `runner` by default guess type automatically. In case of failure of `"auto"`
44 46
#'  please specify desired type.
45 47
#'
48 +
#' @param simplify (`logical` or `character` value)\cr
49 +
#'  should the result be simplified to a vector, matrix or higher dimensional
50 +
#'  array if possible. The default value, `simplify = TRUE`, returns a vector or
51 +
#'  matrix if appropriate, whereas if `simplify = "array"` the result may be an
52 +
#'  array of “rank” `(=length(dim(.)))` one higher than the result of output
53 +
#'  from the function `f` for each window.
54 +
#'
55 +
#' @param cl (`cluster`) *experimental*\cr
56 +
#'  Create and pass the cluster to the `runner` function to run each window
57 +
#'  calculation in parallel. See \code{\link[parallel]{makeCluster}} in details.
58 +
#'
59 +
#'
46 60
#' @param ... (optional)\cr
47 61
#'   other arguments passed to the function `f`.
48 62
#'
@@ -74,37 +88,93 @@
Loading
74 88
#'    \if{latex}{\figure{runningdatewindows.pdf}{options: width=7cm}}
75 89
#'  }
76 90
#'  \item{**Window at specific indices**}{\cr
77 -
#'    `runner` by default returns vector of the same size as `x` unless one specifies
78 -
#'    `at` argument. Each element of `at` is an index on which runner calculates function -
79 -
#'    which means that output of the runner is now of length equal to `at`. Note
80 -
#'    that one can change index of `x` by specifying `idx`.
81 -
#'    Illustration below shows output of `runner` for `at = c(18, 27, 45, 31)`
82 -
#'    which gives windows in ranges enclosed in square brackets. Range for `at = 27` is
83 -
#'    `[22, 26]` which is not available in current indices. \cr
91 +
#'    `runner` by default returns vector of the same size as `x` unless one
92 +
#'    specifies `at` argument. Each element of `at` is an index on which runner
93 +
#'    calculates function - which means that output of the runner is now of
94 +
#'    length equal to `at`. Note that one can change index of `x` by specifying
95 +
#'    `idx`. Illustration below shows output of `runner` for `at = c(18, 27, 45, 31)`
96 +
#'    which gives windows in ranges enclosed in square brackets. Range for
97 +
#'    `at = 27` is `[22, 26]` which is not available in current indices. \cr
84 98
#'    \if{html}{\figure{runnerat.png}{options: width="75\%" alt="Figure: runnerat.png"}}
85 99
#'    \if{latex}{\figure{runnerat.pdf}{options: width=7cm}}
86 -
#'    \cr
87 -
#'    `at` can also be specified as interval of the output defined by `at = "<increment>"`
88 -
#'    which results in output on following indices
89 -
#'    `seq.POSIXt(min(idx), max(idx), by = "<increment>")`. Increment of sequence is the
90 -
#'    same as in \code{\link[base]{seq.POSIXt}} function.
91 -
#'    It's worth noting that increment interval can't be more frequent than
92 -
#'    interval of `idx` - for `Date` the most frequent time-unit is a `"day"`,
93 -
#'    for `POSIXt` a `sec`.
94 -
#'
95 -
#'    `k` and `lag` can also be specified as using time sequence increment. Available
96 -
#'    time units are `"sec", "min", "hour", "day", "DSTday", "week", "month", "quarter" or "year"`.
97 -
#'    To increment by number of units one can also specify `<number> <unit>s`
98 -
#'    for example `lag = "-2 days"`, `k = "5 weeks"`.
99 100
#'  }
100 101
#' }
101 -
#' Above is not enough since `k` and `lag` can be a vector which allows to
102 -
#' stretch and lag/lead each window freely on in time (on indices).
102 +
#' ## Specifying time-intervals
103 +
#'  `at` can also be specified as interval of the output defined by `at = "<increment>"`
104 +
#'  which results in indices sequence defined by
105 +
#'  `seq.POSIXt(min(idx), max(idx), by = "<increment>")`. Increment of sequence
106 +
#'  is the same as in \code{\link[base]{seq.POSIXt}} function.
107 +
#'  It's worth noting that increment interval can't be more frequent than
108 +
#'  interval of `idx` - for `Date` the most frequent time-unit is a `"day"`,
109 +
#'  for `POSIXt` a `sec`.
110 +
#'
111 +
#'  `k` and `lag` can also be specified as using time sequence increment.
112 +
#'  Available time units are
113 +
#'  `"sec", "min", "hour", "day", "DSTday", "week", "month", "quarter" or "year"`.
114 +
#'  To increment by number of units one can also specify `<number> <unit>s`
115 +
#'  for example `lag = "-2 days"`, `k = "5 weeks"`.
116 +
#'
117 +
#'  Setting `k` and `lag` as a sequence increment can be also a vector can be a
118 +
#'  vector which allows to stretch and lag/lead each window freely on in time
119 +
#'  (on indices).
120 +
#' \cr
121 +
#' ## Parallel computing
122 +
#'  Beware that executing R call in parallel not always
123 +
#'  have the edge over single-thread even if the
124 +
#'  `cl <- registerCluster(detectCores())` was specified before.
125 +
#'  \cr
126 +
#'  Parallel windows are executed in the independent environment, which means that
127 +
#'  objects other than function arguments needs to be copied to the parallel
128 +
#'  environment using \code{\link[parallel]{clusterExport}}`. For example using
129 +
#'  `f = function(x) x + y + z` will result in error as
130 +
#'  \code{clusterExport(cl, varlist = c("y", "z"))} needs to be called before.
103 131
#'
104 132
#' @return vector with aggregated values for each window. Length of output is the
105 133
#'  same as `length(x)` or `length(at)` if specified. Type of the output
106 134
#'  is taken from `type` argument.
107 135
#'
136 +
#' @md
137 +
#' @rdname runner
138 +
#' @importFrom methods is
139 +
#' @importFrom parallel clusterExport parLapply
140 +
#' @export
141 +
runner <- function (
142 +
  x,
143 +
  f = function(x) x,
144 +
  k = integer(0),
145 +
  lag = integer(1),
146 +
  idx = integer(0),
147 +
  at = integer(0),
148 +
  na_pad = FALSE,
149 +
  type = "auto",
150 +
  simplify = TRUE,
151 +
  cl = NULL,
152 +
  ...
153 +
  ) {
154 +
  if (!is.null(cl) && type != "auto") {
155 +
    warning(
156 +
      "There is no option to specify the type of the output using type in parallel mode.
157 +
      Please use 'simplify' instead"
158 +
    )
159 +
    type <- "auto"
160 +
  }
161 +
  if (!isFALSE(simplify) && type != "auto") {
162 +
    warning(
163 +
      "When 'simplify != FALSE' 'type' argument is set to 'auto'"
164 +
    )
165 +
    type <- "auto"
166 +
  }
167 +
  if (type != "auto") {
168 +
    warning(
169 +
      "Argument 'type'is deprecated and will be defunct in the next release.
170 +
    Please use 'simplify' argument to manage the output type."
171 +
    )
172 +
  }
173 +
174 +
  UseMethod("runner", x)
175 +
}
176 +
177 +
#' @rdname runner
108 178
#' @examples
109 179
#'
110 180
#' # runner returns windows as is by default
@@ -169,11 +239,8 @@
Loading
169 239
#'   at = c(18, 27, 48, 31),
170 240
#'   f = mean
171 241
#' )
172 -
#' @md
173 -
#' @rdname runner
174 -
#' @importFrom methods is
175 242
#' @export
176 -
runner <- function (
243 +
runner.default <- function(
177 244
  x,
178 245
  f = function(x) x,
179 246
  k = integer(0),
@@ -182,12 +249,113 @@
Loading
182 249
  at = integer(0),
183 250
  na_pad = FALSE,
184 251
  type = "auto",
252 +
  simplify = TRUE,
253 +
  cl = NULL,
185 254
  ...
186 -
  ) {
187 -
  UseMethod("runner", x)
255 +
) {
256 +
  if (any(is.na(k))) {
257 +
    stop("Function doesn't accept NA values in k vector");
258 +
  }
259 +
  if (any(is.na(lag))) {
260 +
    stop("Function doesn't accept NA values in lag vector");
261 +
  }
262 +
  if (any(is.na(idx))) {
263 +
    stop("Function doesn't accept NA values in idx vector");
264 +
  }
265 +
  if (!is(f, "function")) {
266 +
    stop("f should be a function")
267 +
  }
268 +
269 +
  # use POSIXt.seq
270 +
  at <- seq_at(at, idx)
271 +
  k <- k_by(k, if (length(at > 0)) at else idx, "k")
272 +
  lag <- k_by(lag, if (length(at > 0)) at else idx, "lag")
273 +
274 +
  w <- window_run(
275 +
    x = x,
276 +
    k = k,
277 +
    lag = lag,
278 +
    idx = idx,
279 +
    at = at,
280 +
    na_pad = na_pad
281 +
  )
282 +
283 +
  if (!is.null(cl) && is(cl, "cluster")) {
284 +
    answer <- parLapply(
285 +
      cl = cl,
286 +
      X = w,
287 +
      fun = f,
288 +
      ...
289 +
    )
290 +
291 +
  } else if (type != "auto") {
292 +
    n <- length(w)
293 +
    answer <- vector(mode = type, length = n)
294 +
    for (i in seq_len(n)) {
295 +
      ww <- w[[i]]
296 +
      answer[i] <- if (length(ww) == 0) {
297 +
        NA
298 +
      } else {
299 +
        f(ww, ...)
300 +
      }
301 +
    }
302 +
303 +
  } else {
304 +
    answer <- lapply(w, function(.thisWindow)
305 +
      if (is.null(.thisWindow)) {
306 +
        NA
307 +
      } else {
308 +
        f(.thisWindow, ...)
309 +
      }
310 +
    )
311 +
  }
312 +
313 +
  if (!isFALSE(simplify) && length(answer) && type == "auto") {
314 +
    simplify2array(answer, higher = (simplify == "array"))
315 +
  } else {
316 +
    answer
317 +
  }
188 318
}
189 319
190 320
#' @rdname runner
321 +
#' @examples
322 +
#'
323 +
#' # runner with data.frame
324 +
#' df <- data.frame(
325 +
#'   a = 1:13,
326 +
#'   b = 1:13 + rnorm(13, sd = 5),
327 +
#'   idx = seq(Sys.Date(), Sys.Date() + 365, by = "1 month")
328 +
#' )
329 +
#' runner(
330 +
#'   x = df,
331 +
#'   idx = "idx",
332 +
#'   at = "6 months",
333 +
#'   f = function(x) {
334 +
#'     cor(x$a, x$b)
335 +
#'   }
336 +
#' )
337 +
#'
338 +
#' # parallel computing
339 +
#' library(parallel)
340 +
#' data <- data.frame(
341 +
#'   a = runif(100),
342 +
#'   b = runif(100),
343 +
#'   idx = cumsum(sample(rpois(100, 5)))
344 +
#' )
345 +
#' const <- 0
346 +
#' cl <- makeCluster(1)
347 +
#' clusterExport(cl, "const", envir = environment())
348 +
#'
349 +
#' runner(
350 +
#'   x = data,
351 +
#'   k = 10,
352 +
#'   f = function(x) {
353 +
#'     cor(x$a, x$b) + const
354 +
#'   },
355 +
#'   idx = "idx",
356 +
#'   cl = cl
357 +
#' )
358 +
#' stopCluster(cl)
191 359
#' @export
192 360
runner.data.frame <- function(
193 361
  x,
@@ -198,13 +366,11 @@
Loading
198 366
  at = integer(0),
199 367
  na_pad = FALSE,
200 368
  type = "auto",
369 +
  simplify = TRUE,
370 +
  cl = NULL,
201 371
  ...
202 -
  ) {
203 -
204 -
  # dplyr::group_by exception
205 -
  x <- this_group(x)
206 -
207 -
  # set arguments from attrs set by run_by
372 +
) {
373 +
  # set arguments from attrs (set by run_by)
208 374
  k <- set_from_attribute_difftime(x, k) # no deep copy
209 375
  lag <- set_from_attribute_difftime(x, lag)
210 376
  idx <- set_from_attribute_index(x, idx)
@@ -226,7 +392,7 @@
Loading
226 392
227 393
  # use POSIXt.seq
228 394
  at  <- seq_at(at, idx)
229 -
  k   <- k_by(k,   if (length(at) > 0) at else idx, "k")
395 +
  k   <- k_by(k, if (length(at) > 0) at else idx, "k")
230 396
  lag <- k_by(lag, if (length(at) > 0) at else idx, "lag")
231 397
232 398
  w <- window_run(
@@ -238,20 +404,40 @@
Loading
238 404
    na_pad = na_pad
239 405
  )
240 406
241 -
  res <- sapply(w, function(ww) {
242 -
    if (length(ww) == 0) {
243 -
      NA
244 -
    } else {
245 -
      f(x[ww, ], ...)
246 -
    }
247 -
  })
407 +
  answer <- if (!is.null(cl) && is(cl, "cluster")) {
408 +
    clusterExport(cl, varlist = c("x", "f"), envir = environment())
409 +
    parLapply(
410 +
      cl = cl,
411 +
      X = w,
412 +
      fun = function(.thisWindowIdx) {
413 +
        if (length(.thisWindowIdx) == 0) {
414 +
          NA
415 +
        } else {
416 +
          f(x[.thisWindowIdx,], ...)
417 +
        }
418 +
      }
419 +
    )
248 420
249 -
  return(res)
421 +
  } else {
422 +
    lapply(w, function(.thisWindowIdx) {
423 +
      if (length(.thisWindowIdx) == 0) {
424 +
        NA
425 +
      } else {
426 +
        f(x[.thisWindowIdx, ], ...)
427 +
      }
428 +
    })
429 +
  }
430 +
431 +
  if (!isFALSE(simplify) && length(answer)) {
432 +
    simplify2array(answer, higher = (simplify == "array"))
433 +
  } else {
434 +
    answer
435 +
  }
250 436
}
251 437
252 438
#' @rdname runner
253 439
#' @export
254 -
runner.default <- function(
440 +
runner.grouped_df <- function(
255 441
  x,
256 442
  f = function(x) x,
257 443
  k = integer(0),
@@ -260,9 +446,51 @@
Loading
260 446
  at = integer(0),
261 447
  na_pad = FALSE,
262 448
  type = "auto",
449 +
  simplify = TRUE,
450 +
  cl = NULL,
263 451
  ...
264 -
  ) {
452 +
) {
453 +
  runner.data.frame(
454 +
    x = this_group(x),
455 +
    f = f,
456 +
    lag = lag,
457 +
    idx = idx,
458 +
    at = at,
459 +
    na_pad = na_pad,
460 +
    type = type,
461 +
    simplify = simplify,
462 +
    cl = cl,
463 +
    ...
464 +
  )
465 +
}
265 466
467 +
#' @rdname runner
468 +
#' @examples
469 +
#'
470 +
#' # runner with matrix
471 +
#' data <- matrix(data = runif(100, 0, 1), nrow = 20, ncol = 5)
472 +
#' runner(
473 +
#'   x = data,
474 +
#'   f = function(x) {
475 +
#'     tryCatch(
476 +
#'       cor(x),
477 +
#'       error = function(e) NA
478 +
#'     )
479 +
#'  })
480 +
#' @export
481 +
runner.matrix <- function(
482 +
  x,
483 +
  f = function(x) x,
484 +
  k = integer(0),
485 +
  lag = integer(1),
486 +
  idx = integer(0),
487 +
  at = integer(0),
488 +
  na_pad = FALSE,
489 +
  type = "auto",
490 +
  simplify = TRUE,
491 +
  cl = NULL,
492 +
  ...
493 +
) {
266 494
  if (any(is.na(k))) {
267 495
    stop("Function doesn't accept NA values in k vector");
268 496
  }
@@ -276,14 +504,13 @@
Loading
276 504
    stop("f should be a function")
277 505
  }
278 506
279 -
280 507
  # use POSIXt.seq
281 -
  at <- seq_at(at, idx)
282 -
  k <- k_by(k, if (length(at > 0)) at else idx, "k")
283 -
  lag <- k_by(lag, if (length(at > 0)) at else idx, "lag")
508 +
  at  <- seq_at(at, idx)
509 +
  k   <- k_by(k,   if (length(at) > 0) at else idx, "k")
510 +
  lag <- k_by(lag, if (length(at) > 0) at else idx, "lag")
284 511
285 512
  w <- window_run(
286 -
    x = x,
513 +
    x = seq_len(nrow(x)),
287 514
    k = k,
288 515
    lag = lag,
289 516
    idx = idx,
@@ -291,35 +518,98 @@
Loading
291 518
    na_pad = na_pad
292 519
  )
293 520
294 -
  if (type != "auto") {
295 -
    n <- length(w)
296 -
    res <- vector(mode = type, length = n)
297 -
    for (i in seq_len(n)) {
298 -
      ww <- w[[i]]
299 -
      res[i] <- if (length(ww) == 0) {
521 +
  answer <- if (!is.null(cl) && is(cl, "cluster"))  {
522 +
    clusterExport(cl, varlist = c("x", "f"), envir = environment())
523 +
    parLapply(
524 +
      cl = cl,
525 +
      X = w,
526 +
      fun = function(.thisWindowIdx) {
527 +
        if (length(.thisWindowIdx) == 0) {
528 +
          NA
529 +
        } else {
530 +
          f(x[.thisWindowIdx, , drop = FALSE], ...)
531 +
        }
532 +
      },
533 +
      ...
534 +
    )
535 +
  } else {
536 +
    lapply(
537 +
      X = w,
538 +
      FUN = function(.thisWindowIdx) {
539 +
      if (length(.thisWindowIdx) == 0) {
300 540
        NA
301 541
      } else {
302 -
        f(ww, ...)
542 +
        f(x[.thisWindowIdx, , drop = FALSE], ...)
303 543
      }
304 -
    }
544 +
    })
545 +
  }
546 +
  if (!isFALSE(simplify) && length(answer)) {
547 +
    simplify2array(answer, higher = (simplify == "array"))
548 +
  } else {
549 +
    answer
550 +
  }
551 +
}
305 552
553 +
#' @rdname runner
554 +
#' @export
555 +
runner.xts <- function(
556 +
  x,
557 +
  f = function(x) x,
558 +
  k = integer(0),
559 +
  lag = integer(1),
560 +
  idx = integer(0),
561 +
  at = integer(0),
562 +
  na_pad = FALSE,
563 +
  type = "auto",
564 +
  simplify = TRUE,
565 +
  cl = NULL,
566 +
  ...
567 +
) {
568 +
  if (!identical(idx, integer(0))) {
569 +
    warning(
570 +
      "'idx' argument has been specified and will mask index
571 +
      of the 'xts' object."
572 +
    )
306 573
  } else {
307 -
    res <- sapply(w, function(ww)
308 -
      if (is.null(ww)) {
309 -
        NA
310 -
      } else {
311 -
        f(ww, ...)
312 -
      }
574 +
    idx <- structure(
575 +
      .Data = as.vector(attr(x, "index")),
576 +
      class = attr(attr(x, "index"), "tclass"),
577 +
      tz = attr(attr(x, "index"), "tzone")
313 578
    )
314 579
  }
315 580
316 -
  return(res)
581 +
  runner.matrix(
582 +
    x = x,
583 +
    f = f,
584 +
    k = k,
585 +
    lag = lag,
586 +
    idx = idx,
587 +
    at = at,
588 +
    na_pad = na_pad,
589 +
    type = type,
590 +
    simplify = simplify,
591 +
    cl,
592 +
    ...
593 +
  )
317 594
}
318 595
319 -
get_parent_call_arg_names <- function() {
320 -
  cl <- sys.call(-2)
321 -
  f <- get(as.character(cl[[1]]), mode="function", sys.frame(-2))
322 -
  cl <- match.call(definition=f, call=cl)
596 +
# utilities -----
597 +
get_runner_call_arg_names <- function() {
598 +
  runner_call_idx <- which(
599 +
    vapply(
600 +
      X =  rev(sys.calls()),
601 +
      FUN = function(x) x[[1]] == as.name("runner"),
602 +
      FUN.VALUE = logical(1)
603 +
    )
604 +
  ) - 1
605 +
606 +
  cl <- sys.call(-runner_call_idx)
607 +
  f <- get(
608 +
    x = as.character(cl[[1]]),
609 +
    mode = "function",
610 +
    envir = sys.frame(-runner_call_idx)
611 +
  )
612 +
  cl <- match.call(definition = f, call = cl)
323 613
  names(cl)
324 614
}
325 615
@@ -328,7 +618,6 @@
Loading
328 618
    grepl("^(sec|min|hour|day|DSTday|week|month|quarter|year)$", x = x) |
329 619
      grepl("^-*[0-9]+ (sec|min|hour|day|DSTday|week|month|quarter|year)s", x = x)
330 620
  )
331 -
332 621
}
333 622
334 623
@@ -406,7 +695,10 @@
Loading
406 695
407 696
#' Formats time-unit-interval to valid for runner
408 697
#'
409 -
#' Formats time-unit-interval to valid for runner
698 +
#' Formats time-unit-interval to valid for runner. User specifies \code{k} as
699 +
#' positive number but this means that this interval needs to be substracted
700 +
#' from \code{idx} - because windows length extends window backwards in time.
701 +
#' The same situation for lag.
410 702
#' @param k (k or lag) object from runner to be formatted
411 703
#' @param only_positive for \code{k} is \code{TRUE}, for \code{lag} is \code{FALSE}
412 704
#' @examples
@@ -430,57 +722,6 @@
Loading
430 722
  return(k)
431 723
}
432 724
433 -
#' Set window parameters
434 -
#'
435 -
#' Set window parameters for \link{runner}. This function sets the
436 -
#' attributes to \code{x} (only \code{data.frame}) object and saves user effort
437 -
#' to specify window parameters in further multiple \link{runner} calls.
438 -
#' @inheritParams runner
439 -
#' @return x object which \link{runner} can be executed on.
440 -
#' @examples
441 -
#' library(dplyr)
442 -
#'
443 -
#' data <- data.frame(
444 -
#'  index = c(2, 3, 3, 4, 5, 8, 10, 10, 13, 15),
445 -
#'  a = rep(c("a", "b"), each = 5),
446 -
#'  b = 1:10
447 -
#' )
448 -
#'
449 -
#' data %>%
450 -
#'  group_by(a) %>%
451 -
#'  run_by(idx = "index", k = 5) %>%
452 -
#'  mutate(
453 -
#'    c = runner(
454 -
#'      x = .,
455 -
#'      f = function(x) {
456 -
#'        paste(x$b, collapse = ">")
457 -
#'      }
458 -
#'    ),
459 -
#'    d = runner(
460 -
#'      x = .,
461 -
#'      f = function(x) {
462 -
#'        sum(x$b)
463 -
#'      }
464 -
#'    )
465 -
#'  )
466 -
#' @export
467 -
run_by <- function(x, idx, k, lag, na_pad, at) {
468 -
  if (!is.data.frame(x)) {
469 -
    stop("`run_by` should be used only for `data.frame`. \n
470 -
         Use `runner` on x directly.")
471 -
  }
472 -
473 -
  if (!missing(k)) x <- set_run_by_difftime(x, k)
474 -
  if (!missing(lag)) x <- set_run_by_difftime(x, lag)
475 -
  if (!missing(idx)) x <- set_run_by_index(x, idx)
476 -
  if (!missing(at)) x <- set_run_by_index(x, at)
477 -
  if (!missing(na_pad)) attr(x, "na_pad") <- na_pad
478 -
479 -
  return(x)
480 -
}
481 -
482 -
483 -
484 725
485 726
#' Creates sequence for at as time-unit-interval
486 727
#'
@@ -514,67 +755,8 @@
Loading
514 755
  return(at)
515 756
}
516 757
517 -
set_run_by_index <- function(x, arg) {
518 -
  arg_name <- deparse(substitute(arg))
519 -
520 -
  attr(x, arg_name) <- if (is.character(arg) && length(arg) == 1 && arg %in% names(x)) {
521 -
    arg
522 -
523 -
  } else if (is.numeric(arg) || inherits(arg, c("Date", "POSIXct", "POSIXxt", "POSIXlt"))) {
524 -
    arg
525 -
  } else {
526 -
    stop(
527 -
      sprintf(
528 -
        "`%s` should be either:
529 -
         - column name of `x`
530 -
         - vector of type `numeric`, `Date`, `POSIXct` or `POSIXlt`",
531 -
        arg_name
532 -
      ),
533 -
      call. = FALSE
534 -
    )
535 -
  }
536 -
  return(x)
537 -
}
538 -
539 -
set_run_by_difftime <- function(x, arg) {
540 -
  arg_name <- deparse(substitute(arg))
541 -
542 -
  attr(x, arg_name) <- if (is.character(arg)) {
543 -
    if (length(arg) == 1 && arg %in% names(x)) {
544 -
      arg
545 -
    } else if (all(is_datetime_valid(arg))) {
546 -
      arg
547 -
    } else {
548 -
      stop(
549 -
        sprintf(
550 -
          "`%s` is invalid, should be either:
551 -
           - column name of `x`
552 -
           - `difftime` class or character describing diffitme (see at argument in `seq.POSIXt`)
553 -
           - `numeric` or `integer` vector",
554 -
          arg_name
555 -
        ),
556 -
        call. = FALSE
557 -
      )
558 -
    }
559 -
  } else if (is.numeric(arg) || is(arg, "difftime")) {
560 -
    arg
561 -
  } else {
562 -
    stop(
563 -
      sprintf(
564 -
        "`%s` is invalid, should be either:
565 -
           - column name of `x`
566 -
           - `difftime` class or character describing diffitme (see at argument in `seq.POSIXt`)
567 -
           - `numeric` or `integer` `vector`",
568 -
        arg_name
569 -
      ),
570 -
      call. = FALSE
571 -
    )
572 -
  }
573 -
  return(x)
574 -
}
575 -
576 -
set_from_attribute_index <- function(x, attrib) {
577 -
  runner_args <- get_parent_call_arg_names()
758 +
set_from_attribute_at <- function(x, attrib) {
759 +
  runner_args <- get_runner_call_arg_names()
578 760
  arg_name <- deparse(substitute(attrib))
579 761
580 762
  # no arg overwriting
@@ -589,7 +771,8 @@
Loading
589 771
        sprintf(
590 772
          "`%s` should be either:
591 773
         - column name of `x`
592 -
         - vector of type `numeric`, `Date`, `POSIXct` or `POSIXlt`",
774 +
         - vector of type `numeric`, `Date`, `POSIXct` or `POSIXlt`
775 +
         - character value describing dates sequence step as in `by` argument of `seq.POSIXct`",
593 776
          arg_name
594 777
        ),
595 778
        call. = FALSE
@@ -611,6 +794,8 @@
Loading
611 794
612 795
    if (is.character(attrib) && length(attrib) == 1 && attrib %in% names(x)) {
613 796
      attrib <- x[[attrib]]
797 +
    } else if (length(attrib) == 1 && all(is_datetime_valid(attrib))) {
798 +
      # do nothing
614 799
    } else if (is.numeric(attrib) || inherits(attrib, c("Date", "POSIXct", "POSIXxt", "POSIXlt"))) {
615 800
      # do nothing
616 801
    } else {
@@ -629,34 +814,23 @@
Loading
629 814
  return(attrib)
630 815
}
631 816
632 -
set_from_attribute_at <- function(x, attrib) {
633 -
  runner_args <- get_parent_call_arg_names()
817 +
818 +
set_from_attribute_difftime <- function(x, attrib) {
819 +
  runner_args <- get_runner_call_arg_names()
634 820
  arg_name <- deparse(substitute(attrib))
635 821
636 -
  # no arg overwriting
637 822
  if (!is.null(attr(x, arg_name)) && !arg_name %in% runner_args) {
638 -
    if (length(attr(x, arg_name)) == 1 &&
639 -
        is.character(attr(x, arg_name)) &&
640 -
        attr(x, arg_name) %in% names(x)) {
641 -
823 +
    #  - argument has not been specified so it can be overwritten
824 +
    if (length(attr(x, arg_name)) == 1 && attr(x, arg_name) %in% names(x)) {
825 +
      # attr is a variable name
642 826
      attrib <- x[[attr(x, arg_name)]]
643 -
    } else if (is.character(attr(x, arg_name))) {
644 -
      stop(
645 -
        sprintf(
646 -
          "`%s` should be either:
647 -
         - column name of `x`
648 -
         - vector of type `numeric`, `Date`, `POSIXct` or `POSIXlt`
649 -
         - character value describing dates sequence step as in `by` argument of `seq.POSIXct`",
650 -
          arg_name
651 -
        ),
652 -
        call. = FALSE
653 -
      )
654 827
    } else {
828 +
      # attr is a vector of values - length validation later
655 829
      attrib <- attr(x, arg_name)
656 830
    }
657 831
658 -
    # arg overwriting (runner masks run_by)
659 -
  } else {
832 +
  } else if (arg_name %in% runner_args) {
833 +
    # - argument has been specified
660 834
    if (!is.null(attr(x, arg_name))) {
661 835
      warning(
662 836
        sprintf(
@@ -666,18 +840,33 @@
Loading
666 840
      )
667 841
    }
668 842
669 -
    if (is.character(attrib) && length(attrib) == 1 && attrib %in% names(x)) {
670 -
      attrib <- x[[attrib]]
671 -
    } else if (length(attrib) == 1 && all(is_datetime_valid(attrib))) {
672 -
      # do nothing
673 -
    } else if (is.numeric(attrib) || inherits(attrib, c("Date", "POSIXct", "POSIXxt", "POSIXlt"))) {
843 +
    if (is.character(attrib)) {
844 +
      if (length(attrib) == 1 && attrib %in% names(x)) {
845 +
        # argument as variable name
846 +
        attrib <- x[[attrib]]
847 +
      } else if (all(is_datetime_valid(attrib))) {
848 +
        # argument as a difftime character
849 +
      } else {
850 +
        stop(
851 +
          sprintf(
852 +
            "`%s` is invalid, should be either:
853 +
           - column name of `x`
854 +
           - difftime class or character describing diffitme (see at argument in seq.POSIXt)
855 +
           - numeric or integer vector",
856 +
            arg_name
857 +
          ),
858 +
          call. = FALSE
859 +
        )
860 +
      }
861 +
    } else if (is.numeric(attrib) || is(attrib, "difftime")) {
674 862
      # do nothing
675 863
    } else {
676 864
      stop(
677 865
        sprintf(
678 -
          "`%s` should be either:
679 -
         - column name of `x`
680 -
         - vector of type `numeric`, `Date`, `POSIXct` or `POSIXlt`",
866 +
          "`%s` is invalid, should be either:
867 +
           - column name of `x`
868 +
           - difftime class or character describing diffitme (see at argument in `seq.POSIXt`)
869 +
           - numeric or integer vector",
681 870
          arg_name
682 871
        ),
683 872
        call. = FALSE
@@ -688,15 +877,28 @@
Loading
688 877
  return(attrib)
689 878
}
690 879
691 -
692 -
set_from_attribute_difftime <- function(x, attrib) {
693 -
  runner_args <- get_parent_call_arg_names()
880 +
set_from_attribute_index <- function(x, attrib) {
694 881
  arg_name <- deparse(substitute(attrib))
882 +
  runner_args <- get_runner_call_arg_names()
695 883
696 -
  # no arg overwriting
884 +
  # No arg overwriting
885 +
  #  - attribute not empty and argument not specified
697 886
  if (!is.null(attr(x, arg_name)) && !arg_name %in% runner_args) {
698 -
    if (length(attr(x, arg_name)) == 1 && attr(x, arg_name) %in% names(x)) {
887 +
    if (length(attr(x, arg_name)) == 1 &&
888 +
        is.character(attr(x, arg_name)) &&
889 +
        attr(x, arg_name) %in% names(x)) {
890 +
699 891
      attrib <- x[[attr(x, arg_name)]]
892 +
    } else if (is.character(attr(x, arg_name))) {
893 +
      stop(
894 +
        sprintf(
895 +
          "`%s` should be either:
896 +
         - column name of `x`
897 +
         - vector of type `numeric`, `Date`, `POSIXct` or `POSIXlt`",
898 +
          arg_name
899 +
        ),
900 +
        call. = FALSE
901 +
      )
700 902
    } else {
701 903
      attrib <- attr(x, arg_name)
702 904
    }
@@ -712,32 +914,16 @@
Loading
712 914
      )
713 915
    }
714 916
715 -
    if (is.character(attrib)) {
716 -
      if (length(attrib) == 1 && attrib %in% names(x)) {
717 -
        attrib <- x[[attrib]]
718 -
      } else if (all(is_datetime_valid(attrib))) {
719 -
        # do nothing
720 -
      } else {
721 -
        stop(
722 -
          sprintf(
723 -
            "`%s` is invalid, should be either:
724 -
           - column name of `x`
725 -
           - difftime class or character describing diffitme (see at argument in seq.POSIXt)
726 -
           - numeric or integer vector",
727 -
            arg_name
728 -
          ),
729 -
          call. = FALSE
730 -
        )
731 -
      }
732 -
    } else if (is.numeric(attrib) || is(attrib, "difftime")) {
917 +
    if (is.character(attrib) && length(attrib) == 1 && attrib %in% names(x)) {
918 +
      attrib <- x[[attrib]]
919 +
    } else if (is.numeric(attrib) || inherits(attrib, c("Date", "POSIXct", "POSIXxt", "POSIXlt"))) {
733 920
      # do nothing
734 921
    } else {
735 922
      stop(
736 923
        sprintf(
737 -
          "`%s` is invalid, should be either:
738 -
           - column name of `x`
739 -
           - difftime class or character describing diffitme (see at argument in `seq.POSIXt`)
740 -
           - numeric or integer vector",
924 +
          "`%s` should be either:
925 +
         - column name of `x`
926 +
         - vector of type `numeric`, `Date`, `POSIXct` or `POSIXlt`",
741 927
          arg_name
742 928
        ),
743 929
        call. = FALSE
@@ -749,7 +935,7 @@
Loading
749 935
}
750 936
751 937
set_from_attribute_logical <- function(x, attrib) {
752 -
  runner_args <- get_parent_call_arg_names()
938 +
  runner_args <- get_runner_call_arg_names()
753 939
  arg_name <- deparse(substitute(attrib))
754 940
755 941
  # no arg overwriting
@@ -781,24 +967,21 @@
Loading
781 967
#' @md
782 968
#' @return data.frame filtered by current `dplyr::groups()`
783 969
this_group <- function(x) {
784 -
  if (is(x, "grouped_df")) {
785 -
    attrs <- attributes(x)
786 -
    attrs <- attrs[names(attrs) != "row.names"]
787 -
788 -
    new_env <- new.env(parent = parent.frame(n = 2)$.top_env)
789 -
    df_call <- as.call(
790 -
      append(
791 -
        as.name("data.frame"),
792 -
        lapply(names(x), as.name)
793 -
      )
970 +
  attrs <- attributes(x)
971 +
  attrs <- attrs[names(attrs) != "row.names"]
972 +
973 +
  new_env <- new.env(parent = parent.frame(n = 2)$.top_env)
974 +
  df_call <- as.call(
975 +
    append(
976 +
      as.name("data.frame"),
977 +
      lapply(names(x), as.name)
794 978
    )
979 +
  )
795 980
796 -
    x <- eval(df_call, envir = new_env)
797 -
    for (i in seq_along(attrs)) {
798 -
      attr(x, names(attrs)[i]) <- attrs[[i]]
799 -
    }
981 +
  x <- eval(df_call, envir = new_env)
982 +
  for (i in seq_along(attrs)) {
983 +
    attr(x, names(attrs)[i]) <- attrs[[i]]
800 984
  }
801 -
802 985
  return(x)
803 986
}
804 987
Files Coverage
R 97.46%
src 93.88%
Project Totals (13 files) 94.78%
1
comment: false
2

3
coverage:
4
  status:
5
    project:
6
      default:
7
        target: auto
8
        threshold: 1%
9
    patch:
10
      default:
11
        target: auto
12
        threshold: 1%
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading