1
#' Tidy Resampling Object
2
#'
3
#' The `tidy` function from the \pkg{broom} package can be used on `rset` and
4
#'  `rsplit` objects to generate tibbles with which rows are in the analysis and
5
#'  assessment sets.
6
#' @param x A  `rset` or  `rsplit` object
7
#' @param unique_ind Should unique row identifiers be returned? For example,
8
#'  if `FALSE` then bootstrapping results will include multiple rows in the
9
#'  sample for the same row in the original data.
10
#' @param ... Not currently used.
11
#' @return A tibble with columns `Row` and `Data`. The latter has possible
12
#'  values "Analysis" or "Assessment". For `rset` inputs, identification columns
13
#'  are also returned but their names and values depend on the type of
14
#'  resampling. `vfold_cv` contains a column "Fold" and, if repeats are used,
15
#'  another called "Repeats". `bootstraps` and `mc_cv` use the column
16
#'  "Resample".
17
#' @details Note that for nested resampling, the rows of the inner resample,
18
#'  named `inner_Row`, are *relative* row indices and do not correspond to the
19
#'  rows in the original data set.
20
#' @examples
21
#' library(ggplot2)
22
#' theme_set(theme_bw())
23
#'
24
#' set.seed(4121)
25
#' cv <- tidy(vfold_cv(mtcars, v = 5))
26
#' ggplot(cv, aes(x = Fold, y = Row, fill = Data)) +
27
#'   geom_tile() + scale_fill_brewer()
28
#'
29
#' set.seed(4121)
30
#' rcv <- tidy(vfold_cv(mtcars, v = 5, repeats = 2))
31
#' ggplot(rcv, aes(x = Fold, y = Row, fill = Data)) +
32
#'   geom_tile() + facet_wrap(~Repeat) + scale_fill_brewer()
33
#'
34
#' set.seed(4121)
35
#' mccv <- tidy(mc_cv(mtcars, times = 5))
36
#' ggplot(mccv, aes(x = Resample, y = Row, fill = Data)) +
37
#'   geom_tile() + scale_fill_brewer()
38
#'
39
#' set.seed(4121)
40
#' bt <- tidy(bootstraps(mtcars, time = 5))
41
#' ggplot(bt, aes(x = Resample, y = Row, fill = Data)) +
42
#'   geom_tile() + scale_fill_brewer()
43
#'
44
#' dat <- data.frame(day = 1:30)
45
#' # Resample by week instead of day
46
#' ts_cv <- rolling_origin(dat, initial = 7, assess = 7,
47
#'                         skip = 6, cumulative = FALSE)
48
#' ts_cv <- tidy(ts_cv)
49
#' ggplot(ts_cv, aes(x = Resample, y = factor(Row), fill = Data)) +
50
#'   geom_tile() + scale_fill_brewer()
51
#' @export
52
tidy.rsplit <- function(x, unique_ind = TRUE, ...) {
53 1
  if (unique_ind) x$in_id <- unique(x$in_id)
54 1
  out <- tibble(Row = c(x$in_id, complement(x)),
55 1
                Data = rep(c("Analysis", "Assessment"),
56 1
                           c(length(x$in_id), length(complement(x)))))
57 1
  out <- dplyr::arrange(.data = out, Data, Row)
58 1
  out
59
}
60

61
#' @rdname tidy.rsplit
62
#' @export
63
tidy.rset <- function(x, ...)  {
64 1
  dots <- list(...)
65 1
  unique_ind <- dplyr::if_else(is.null(dots$unique_ind),
66 1
                               TRUE,
67 1
                               dots$unique_ind)
68 1
  stacked <- purrr::map(x$splits, tidy, unique_ind = unique_ind)
69 1
  for (i in seq(along = stacked))
70 1
    stacked[[i]]$Resample <- x$id[i]
71 1
  stacked <- dplyr::bind_rows(stacked)
72 1
  stacked <- dplyr::arrange(.data = stacked, Data, Row)
73 1
  stacked
74
}
75
#' @rdname tidy.rsplit
76
#' @export
77
tidy.vfold_cv <- function(x, ...)  {
78 1
  stacked <- purrr::map(x$splits, tidy)
79 1
  for (i in seq(along = stacked)) {
80 1
    if (attr(x, "repeats") > 1) {
81 1
      stacked[[i]]$Repeat <- x$id[i]
82 1
      stacked[[i]]$Fold <- x$id2[i]
83
    } else
84 1
      stacked[[i]]$Fold <- x$id[i]
85
  }
86 1
  stacked <- dplyr::bind_rows(stacked)
87 1
  stacked <- dplyr::arrange(.data = stacked, Data, Row)
88 1
  stacked
89
}
90

91
#' @rdname tidy.rsplit
92
#' @export
93
tidy.nested_cv <- function(x, ...)  {
94

95 0
  x$inner_tidy <- purrr::map(x$inner_resamples, tidy_wrap)
96 0
  inner_tidy <- tidyr::unnest(x, inner_tidy)
97 0
  class(x) <- class(x)[class(x) != "nested_cv"]
98 0
  outer_tidy <- tidy(x)
99 0
  id_cols <- names(outer_tidy)
100 0
  id_cols <- id_cols[!(id_cols %in% c("Row", "Data"))]
101

102 0
  inner_id <- grep("^id", names(inner_tidy))
103 0
  if (length(inner_id) != length(id_cols))
104 0
    stop("Cannot merge tidt data sets", call. = FALSE)
105 0
  names(inner_tidy)[inner_id] <- id_cols
106 0
  full_join(outer_tidy, inner_tidy, by = id_cols)
107
}
108

109
tidy_wrap <- function(x) {
110 0
  x <- tidy(x)
111 0
  names(x) <- paste0("inner_", names(x))
112 0
  x
113
}

Read our documentation on viewing source code .

Loading