1
|
|
#' Sampling for the Apparent Error Rate
|
2
|
|
#'
|
3
|
|
#' When building a model on a data set and re-predicting the same data, the
|
4
|
|
#' performance estimate from those predictions is often called the
|
5
|
|
#' "apparent" performance of the model. This estimate can be wildly
|
6
|
|
#' optimistic. "Apparent sampling" here means that the analysis and
|
7
|
|
#' assessment samples are the same. These resamples are sometimes used in
|
8
|
|
#' the analysis of bootstrap samples and should otherwise be
|
9
|
|
#' avoided like old sushi.
|
10
|
|
#'
|
11
|
|
#' @inheritParams vfold_cv
|
12
|
|
#' @return A tibble with a single row and classes `apparent`,
|
13
|
|
#' `rset`, `tbl_df`, `tbl`, and `data.frame`. The
|
14
|
|
#' results include a column for the data split objects and one column
|
15
|
|
#' called `id` that has a character string with the resample identifier.
|
16
|
|
#' @examples
|
17
|
|
#' apparent(mtcars)
|
18
|
|
#' @export
|
19
|
|
apparent <- function(data, ...) {
|
20
|
1
|
splits <- rsplit(data, in_id = 1:nrow(data), out_id = 1:nrow(data))
|
21
|
|
# splits <- rm_out(splits)
|
22
|
1
|
class(splits) <- c("rsplit", "apparent_split")
|
23
|
1
|
split_objs <- tibble::tibble(splits = list(splits), id = "Apparent")
|
24
|
|
|
25
|
1
|
new_rset(splits = split_objs$splits,
|
26
|
1
|
ids = split_objs$id,
|
27
|
1
|
attrib = NULL,
|
28
|
1
|
subclass = c("apparent", "rset"))
|
29
|
|
}
|
30
|
|
|
31
|
|
#' @export
|
32
|
|
print.apparent <- function(x, ...) {
|
33
|
0
|
cat("#", pretty(x), "\n")
|
34
|
0
|
class(x) <- class(x)[!(class(x) %in% c("apparent", "rset"))]
|
35
|
0
|
print(x, ...)
|
36
|
|
}
|
37
|
|
|
38
|
|
|