Showing 8 of 134 files from the diff.
Other files ignored by Codecov
man/pima_meds.Rd has changed.
man/reexports.Rd has changed.
docs/pkgdown.css has changed.
docs/pkgdown.yml has changed.
.Rbuildignore has changed.
docs/favicon.ico has changed.
docs/404.html has changed.
docs/authors.html has changed.
docs/index.html has changed.
docs/pkgdown.js has changed.
NEWS.md has changed.
.Rprofile is new.
DESCRIPTION has changed.
renv.lock is new.

@@ -241,10 +241,11 @@
Loading
241 241
  pivoted <-
242 242
    pivoted %>%
243 243
    dplyr::left_join(d, ., by = rlang::quo_name(id))
244 -
245 244
  # Replace any rows not found in the pivot table in the join with missing_fill
246 245
  new_cols <- setdiff(names(pivoted), names(d))
247 -
  pivoted[new_cols][is.na(pivoted[new_cols])] <- missing_fill
246 +
  if (!is.na(missing_fill)) {
247 +
    pivoted[new_cols][is.na(pivoted[new_cols])] <- missing_fill
248 +
  }
248 249
  # Add new best_levels to any that came in on d
249 250
  attr(pivoted, "best_levels") <-
250 251
    c(attr(d, "best_levels"),
@@ -284,7 +285,7 @@
Loading
284 285
    dplyr::inner_join(d, ., by = rlang::quo_name(id)) %>%
285 286
    # Filter any level present in only one grain
286 287
    group_by(!!groups) %>%
287 -
    filter(n_distinct(!!id) >= min_obs) %>%
288 +
    filter(n_distinct(!!sym(quo_name(id))) >= min_obs) %>%
288 289
    ungroup()
289 290
290 291
  if (!nrow(tomodel)) {
@@ -320,9 +321,10 @@
Loading
320 321
                  fraction_positive == 1 ~ 1 - (.5 / total_observations),
321 322
                  fraction_positive == 0 ~ .5 / total_observations,
322 323
                  TRUE ~ fraction_positive),
324 +
323 325
                # If level present in every observation, call it every one minus one-half
324 -
                present_in = ifelse(n_distinct(!!id) == total_observations,
325 -
                                    total_observations - .5, n_distinct(!!id)),
326 +
                present_in = ifelse(n_distinct(rlang::quo_name(id)) == total_observations,
327 +
                                    total_observations - .5, n_distinct(rlang::quo_name(id))),
326 328
                log_dist_from_in_all = -log(present_in / total_observations)) %>%
327 329
      dplyr::select(-present_in)
328 330
    median_positive <- stats::median(levs$fraction_positive)

@@ -135,7 +135,9 @@
Loading
135 135
                                 hold = hold,
136 136
                                 training_data = training_data)
137 137
  static <- do.call(dplyr::bind_rows, replicate(nrow(d), static, simplify = FALSE))
138 -
  d <- dplyr::bind_cols(d, static)
138 +
  if (!all(dim(static) == c(0, 0))) {
139 +
    d <- dplyr::bind_cols(d, static)
140 +
  }
139 141
  suppressWarnings(suppressMessages(preds <- predict(models, d)))
140 142
  # Intentionally leave off the predicted_df class here to avoid performance-
141 143
  # in-training info printing

@@ -31,22 +31,28 @@
Loading
31 31
                  "The estimates are very rough, and you should expect the progress ",
32 32
                  "ticker to cycle ", train_control$number + 1, " times.")
33 33
      } else if (model == "xgbTree") {
34 -
        train_args$allowParallel <- allow_parallel
34 +
        # If regression, overwrite caret's default of "reg:linear"
35 +
        if (metric %in% c("MAE", "RMSE", "Rsquared")) {
36 +
          train_args$objective <- "reg:squarederror"
37 +
        }
35 38
      }
36 -
37 39
      # caret loads packages at runtime, we don't want to see those startup messages
38 40
      suppressPackageStartupMessages({
39 41
        # Often get a single missing performance metric warning that doesn't
40 -
        # hurt anything, so silence it
42 +
        # hurt anything, so silence it.
43 +
        # Another warning for objective "reg:squarederror". For some reason, caret passes "reg:linear" by default
44 +
        # and xgb warns that linear was depreciated.
41 45
        withCallingHandlers(
42 46
          expr = do.call(caret::train, train_args),
43 47
          warning = function(w) {
44 48
            if (grepl("missing values in resampled", w))
45 49
              invokeRestart("muffleWarning")
46 -
          })
47 -
      })
48 -
50 +
            if (grepl("following parameters were provided multiple times", w))
51 +
              invokeRestart("muffleWarning")
52 +
          }
53 +
        )
49 54
    })
55 +
  })
50 56
  message("\n*** Models successfully trained. The model object contains the training data minus ignored ID columns. ***\n",
51 57
          "*** If there was PHI in training data, normal PHI protocols apply to the model object. ***")
52 58
  structure(train_list, positive_class = levels(dplyr::pull(d, !!outcome))[1]) %>%

@@ -162,15 +162,6 @@
Loading
162 162
    if (nominal_method == "new_category") {
163 163
      recipe <- step_missing(recipe, all_nominal(), - all_outcomes())
164 164
    } else if (nominal_method == "bagimpute") {
165 -
      if ("character" %in% map_chr(recipe$template, ~{
166 -
        class(.x) %>% first()
167 -
      }))
168 -
        warning("`bagimpute` depends on another library that does not support",
169 -
                " character columns yet. Check `bagimpute` by setting ",
170 -
                "`collapse_rare_factors = FALSE`, and verify that missing ",
171 -
                "values have been imputed with `missingness()`. If `bagimpute`",
172 -
                " does not impute missing values, please convert all character",
173 -
                " columns to factors.")
174 165
      recipe <- step_bagimpute(
175 166
        recipe,
176 167
        all_nominal(),
@@ -179,11 +170,7 @@
Loading
179 170
        options = nom_p$bag_options,
180 171
        impute_with = nom_p$impute_with,
181 172
        seed_val = nom_p$seed_val)
182 -
    }  else if (nominal_method == "knnimpute") {
183 -
      if ("character" %in% map_chr(recipe$template, ~class(.x) %>% first()))
184 -
        message("`knnimpute` depends on another library that does not support ",
185 -
                "character columns yet. If `knnimpute` fails please convert ",
186 -
                "all character columns to factors for knn imputation.")
173 +
    } else if (nominal_method == "knnimpute") {
187 174
      recipe <- step_knnimpute(
188 175
        recipe,
189 176
        all_nominal(), - all_outcomes(),

@@ -35,9 +35,8 @@
Loading
35 35
#'   \code{\link{hcai_impute}}
36 36
#' @param model_name Quoted, name of the model. Defaults to the name of the
37 37
#'   outcome variable.
38 -
#' @param allow_parallel Logical, defaults to FALSE. If TRUE and a parallel
39 -
#'   backend is set up (e.g. with \code{doMC}) models with support for parallel
40 -
#'   training will be trained across cores.
38 +
#' @param allow_parallel Depreciated. Instead, control the number of cores though your
39 +
#' parallel back end (e.g. with \code{doMC}).
41 40
#'
42 41
#' @return A model_list object. You can call \code{plot}, \code{summary},
43 42
#'   \code{evaluate}, or \code{predict} on a model_list.

@@ -38,9 +38,8 @@
Loading
38 38
#'   displayed in a message.
39 39
#' @param model_name Quoted, name of the model. Defaults to the name of the
40 40
#' outcome variable.
41 -
#' @param allow_parallel Logical, defaults to FALSE. If TRUE and a parallel
42 -
#'   backend is set up (e.g. with \code{doMC}) models with support for parallel
43 -
#'   training will be trained across cores.
41 +
#' @param allow_parallel Depreciated. Instead, control the number of cores though your
42 +
#' parallel back end (e.g. with \code{doMC}).
44 43
#'
45 44
#' @export
46 45
#' @importFrom rlang quo_name

@@ -138,12 +138,16 @@
Loading
138 138
do_aggregate <- function(d, grain, spread, fill, fun, default_fun) {
139 139
140 140
  start_rows <- nrow(d)
141 +
  exp_groups <- d %>%
142 +
    count(!!grain, !!spread) %>%
143 +
    nrow()
141 144
  # Define "safe" version of aggregate_rows for error handling
142 145
  ar <- purrr::safely(aggregate_rows)
143 146
  d <- ar(d, grain, spread, fill, fun)
144 -
145 147
  # If aggregate_rows didn't error, return result
146 -
  if (is.null(d$error)) {
148 +
  if (nrow(d$result) != exp_groups) {
149 +
    stop("No aggregration occured, check your'fun'.")
150 +
  } else if (is.null(d$error)) {
147 151
    # If the user didn't provide fun, and aggregation happened warn that we'll use sum
148 152
    if (default_fun && nrow(d$result) < start_rows) {
149 153
      message("There are rows that contain the same values of both ",
@@ -181,6 +185,7 @@
Loading
181 185
#' @details All variables come through from pivot
182 186
#' @importFrom data.table dcast.data.table
183 187
#' @importFrom data.table as.data.table
188 +
#' @importFrom tibble as_tibble
184 189
#' @return Pivoted tibble. One row for each grain; one column for each spread
185 190
#' @noRd
186 191
pivot_maker <- function(d, grain, spread, fill, missing_fill) {
@@ -190,7 +195,7 @@
Loading
190 195
                                    formula = f,
191 196
                                    fill = missing_fill,
192 197
                                    value.var = rlang::quo_name(fill))
193 -
  d <- as.tbl(d)
198 +
  d <- tibble::as_tibble(d)
194 199
  ## Add spread as prefix to nonID columns
195 200
  names(d)[2:ncol(d)] <- paste0(rlang::quo_name(spread), "_", names(d)[2:ncol(d)])
196 201
  return(d)

@@ -27,9 +27,8 @@
Loading
27 27
#'   displayed in a message.
28 28
#' @param model_name Quoted, name of the model. Defaults to the name of the
29 29
#' outcome variable.
30 -
#' @param allow_parallel Logical, defaults to FALSE. If TRUE and a parallel
31 -
#'   backend is set up (e.g. with \code{doMC}) models with support for parallel
32 -
#'   training will be trained across cores.
30 +
#' @param allow_parallel Depreciated. Instead, control the number of cores though your
31 +
#' parallel back end (e.g. with \code{doMC}).
33 32
#'
34 33
#' @export
35 34
#' @seealso For setting up model training: \code{\link{prep_data}},
Files Coverage
R 95.3%
Project Totals (41 files) 95.3%