1
#' Create a Validation Set
2
#'
3
#' `validation_split()` takes a single random sample (without replacement) of
4
#'  the original data set to be used for analysis. All other data points are
5
#'  added to the assessment set (to be used as the validation set).
6
#' @details The `strata` argument causes the random sampling to be conducted
7
#'  *within the stratification variable*. This can help ensure that the number of
8
#'  data points in the analysis data is equivalent to the proportions in the
9
#'  original data set. (Strata below 10% of the total are pooled together.)
10
#' @inheritParams vfold_cv
11
#' @param prop The proportion of data to be retained for modeling/analysis.
12
#' @param strata A variable that is used to conduct stratified sampling to
13
#'  create the resamples. This could be a single character value or a variable
14
#'  name that corresponds to a variable that exists in the data frame.
15
#' @param breaks A single number giving the number of bins desired to stratify
16
#'  a numeric stratification variable.
17
#' @export
18
#' @return An tibble with classes `validation_split`, `rset`, `tbl_df`, `tbl`,
19
#'  and `data.frame`. The results include a column for the data split objects
20
#'  and a column called `id` that has a character string with the resample
21
#'  identifier.
22
#' @examples
23
#' validation_split(mtcars, prop = .9)
24
#' @export
25
validation_split <- function(data, prop = 3/4, strata = NULL, breaks = 4, ...) {
26

27 1
  if (!missing(strata)) {
28 1
    strata <- tidyselect::vars_select(names(data), !!enquo(strata))
29 1
    if (length(strata) == 0) {
30 0
      strata <- NULL
31
    }
32
  }
33

34 1
  strata_check(strata, names(data))
35

36 1
  split_objs <-
37 1
    mc_splits(data = data,
38 1
              prop = 1 - prop,
39 1
              times = 1,
40 1
              strata = strata,
41 1
              breaks = breaks)
42

43
  ## We remove the holdout indices since it will save space and we can
44
  ## derive them later when they are needed.
45

46 1
  split_objs$splits <- map(split_objs$splits, rm_out)
47 1
  class(split_objs$splits[[1]]) <- c("val_split", "rsplit")
48

49 1
  val_att <- list(prop = prop,
50 1
                 strata = !is.null(strata))
51

52 1
  new_rset(splits = split_objs$splits,
53 1
           ids = "validation",
54 1
           attrib = val_att,
55 1
           subclass = c("validation_split", "rset"))
56
}
57

58
#' @export
59
print.validation_split <- function(x, ...) {
60 1
  cat("#", pretty(x), "\n")
61 1
  class(x) <- class(x)[!(class(x) %in% c("validation_split", "rset"))]
62 1
  print(x, ...)
63
}
64

65

66
#' @export
67
print.val_split<- function(x, ...) {
68

69 1
  if (all(is.na(x$out_id))) {
70 1
    out_char <- paste(length(complement(x)))
71
  } else {
72 0
    out_char <- paste(length(x$out_id))
73
  }
74

75 1
  cat("<Training/Validation/Total>\n")
76 1
  cat("<",
77 1
      length(x$in_id), "/",
78 1
      out_char, "/",
79 1
      nrow(x$data), ">\n",
80 1
      sep = "")
81
}

Read our documentation on viewing source code .

Loading