tidymodels / infer

Compare 015e953 ... +1 ... ee84cae

No flags found

Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.

e.g., #unittest #integration

#production #enterprise

#frontend #backend

Learn more about Codecov Flags here.

Showing 1 of 2 files from the diff.

@@ -2,9 +2,9 @@
Loading
2 2
#'
3 3
#' @description
4 4
#'
5 -
#' Compute a confidence interval around a summary statistic. Only 
6 -
#' simulation-based methods are (currently) supported.
7 -
#' 
5 +
#' Compute a confidence interval around a summary statistic. Currently, only
6 +
#' simulation-based methods are supported.
7 +
#'
8 8
#' Learn more in `vignette("infer")`.
9 9
#'
10 10
#' @param x Data frame of calculated statistics or containing attributes of
@@ -21,54 +21,71 @@
Loading
21 21
#'
22 22
#' @return A 1 x 2 tibble with 'lower_ci' and 'upper_ci' columns. Values
23 23
#'   correspond to lower and upper bounds of the confidence interval.
24 +
#`
25 +
#' @details
26 +
#'   A null hypothesis is not required to compute a confidence interval,
27 +
#'   but including `hypothesize()` in a chain leading to `get_confidence_interval()`
28 +
#'   will not break anything.  This can be useful when computing a confidence
29 +
#'   interval after previously computing a p-value.
30 +
#'
24 31
#'
25 32
#' @section Aliases:
26 -
#' `get_ci()` is an alias of `get_confidence_interval()`.
27 -
#' `conf_int()` is a deprecated alias of `get_confidence_interval()`.
33 +
#'   `get_ci()` is an alias of `get_confidence_interval()`.
34 +
#'   `conf_int()` is a deprecated alias of `get_confidence_interval()`.
28 35
#'
29 36
#' @examples
30 -
#' 
31 -
#' # find the point estimate---mean number of hours worked per week
32 -
#' point_estimate <- gss %>%
37 +
#'
38 +
#' gss %>%
39 +
#'   # we're interested in the number of hours worked per week
40 +
#'   specify(response = hours) %>%
41 +
#'   # generate bootstrap samples
42 +
#'   generate(reps = 1000, type = "bootstrap") %>%
43 +
#'   # calculate mean of each bootstrap sample
44 +
#'   calculate(stat = "mean") %>%
45 +
#'   # calculate the confidence interval around the point estimate
46 +
#'   get_confidence_interval(
47 +
#'     # at the 95% confidence level; percentile method
48 +
#'     level = 0.95
49 +
#'   )
50 +
#'
51 +
#' # for type = "se" or type = "bias-corrected" we need a point estimate
52 +
#' sample_mean <- gss %>%
33 53
#'   specify(response = hours) %>%
34 54
#'   calculate(stat = "mean") %>%
35 55
#'   dplyr::pull()
36 -
#' 
37 -
#' # starting with the gss dataset
56 +
#'
38 57
#' gss %>%
39 58
#'   # ...we're interested in the number of hours worked per week
40 59
#'   specify(response = hours) %>%
41 -
#'   # hypothesizing that the mean is 40
42 -
#'   hypothesize(null = "point", mu = 40) %>%
43 60
#'   # generating data points for a null distribution
44 61
#'   generate(reps = 1000, type = "bootstrap") %>%
45 62
#'   # finding the null distribution
46 63
#'   calculate(stat = "mean") %>%
47 64
#    # calculate the confidence interval around the point estimate
48 65
#'   get_confidence_interval(
49 -
#'     point_estimate = point_estimate,
66 +
#'     point_estimate = sample_mean,
50 67
#'     # at the 95% confidence level
51 68
#'     level = 0.95,
52 69
#'     # using the standard error method
53 70
#'     type = "se"
54 71
#'   )
55 -
#'   
72 +
#'
56 73
#' # More in-depth explanation of how to use the infer package
57 74
#' \dontrun{
58 75
#' vignette("infer")
59 -
#' } 
60 -
#'  
76 +
#' }
77 +
#'
61 78
#' @name get_confidence_interval
62 79
#' @export
63 80
get_confidence_interval <- function(x, level = 0.95, type = "percentile",
64 81
                                    point_estimate = NULL) {
65 82
  check_ci_args(x, level, type, point_estimate)
66 -
  
83 +
67 84
  # Inform if no `level` was explicitly supplied
68 85
  if (!("level" %in% rlang::call_args_names(match.call()))) {
69 86
    message_glue("Using `level = {level}` to compute confidence interval.")
70 87
  }
71 -
  
88 +
72 89
  switch(
73 90
    type,
74 91
    percentile = ci_percentile(x, level),
@@ -88,30 +105,30 @@
Loading
88 105
89 106
ci_percentile <- function(x, level) {
90 107
  ci_vec <- stats::quantile(x[["stat"]], probs = (1 + c(-level, level)) / 2)
91 -
  
108 +
92 109
  make_ci_df(ci_vec)
93 110
}
94 111
95 112
ci_se <- function(x, level, point_estimate) {
96 113
  point_estimate <- check_obs_stat(point_estimate)
97 -
  
114 +
98 115
  multiplier <- stats::qnorm((1 + level) / 2)
99 116
  ci_vec <- point_estimate + c(-multiplier, multiplier) * stats::sd(x[["stat"]])
100 -
  
117 +
101 118
  make_ci_df(ci_vec)
102 119
}
103 120
104 121
ci_bias_corrected <- function(x, level, point_estimate) {
105 122
  point_estimate <- check_obs_stat(point_estimate)
106 -
  
123 +
107 124
  p <- mean(x[["stat"]] <= point_estimate)
108 -
  z0 <- stats::qnorm(p) 
125 +
  z0 <- stats::qnorm(p)
109 126
  # z_alpha_2 is z_(alpha/2)
110 127
  z_alpha_2 <- stats::qnorm((1 + c(-level, level)) / 2)
111 128
  new_probs <- stats::pnorm(2*z0 + z_alpha_2)
112 -
  
129 +
113 130
  ci_vec <- stats::quantile(x[["stat"]], probs = new_probs)
114 -
  
131 +
115 132
  make_ci_df(ci_vec)
116 133
}
117 134
@@ -126,7 +143,7 @@
Loading
126 143
  }
127 144
  check_type(x, is.data.frame)
128 145
  check_type(level, is.numeric)
129 -
  
146 +
130 147
  if ((level <= 0) || (level >= 1)) {
131 148
    stop_glue("The value of `level` must be between 0 and 1 non-inclusive.")
132 149
  }

Everything is accounted for!

No changes detected that need to be reviewed.
What changes does Codecov check for?
Lines, not adjusted in diff, that have changed coverage data.
Files that introduced coverage data that had none before.
Files that have missing coverage data that once were tracked.
Files Coverage
R 99.84%
Project Totals (15 files) 99.84%
Loading