tlverse / sl3

Compare fd1f409 ... +6 ... cf208dd

Coverage Reach
sl3_Task.R Lrnr_base.R Lrnr_cv.R Lrnr_sl.R Lrnr_h2o_grid.R utils.R Stack.R Lrnr_lstm_keras.R Lrnr_gru_keras.R variable_type.R Lrnr_xgboost.R Lrnr_density_semiparametric.R Lrnr_gam.R Lrnr_glm_fast.R Lrnr_lightgbm.R importance.R loss_functions.R Lrnr_solnp.R Pipeline.R Lrnr_screener_coefs.R process_data.R Lrnr_stratified.R Lrnr_multiple_ts.R Lrnr_glm.R Lrnr_h2o_glm.R learner_helpers.R Lrnr_glmnet.R Lrnr_screener_correlation.R zzz.R Lrnr_bilstm.R Lrnr_bayesglm.R Lrnr_optim.R Lrnr_ts_weights.R Lrnr_density_discretize.R Lrnr_randomForest.R prediction_plot.R Lrnr_arima.R Lrnr_haldensify.R Shared_Data.R Lrnr_caret.R Lrnr_solnp_density.R Lrnr_independent_binomial.R survival_utils.R Lrnr_multivariate.R process_missing.R Lrnr_ranger.R Lrnr_hal9001.R Lrnr_screener_importance.R Lrnr_svm.R Lrnr_polspline.R multinomial_helpers.R Lrnr_mean.R Lrnr_harmonicReg.R Lrnr_pooled_hazards.R Lrnr_bartMachine.R Lrnr_pkg_SuperLearner_screener.R Lrnr_nnls.R Lrnr_gbm.R Lrnr_dbarts.R Lrnr_pkg_SuperLearner.R Lrnr_earth.R Lrnr_hts.R Lrnr_tsDyn.R Lrnr_rpart.R Lrnr_cv_selector.R Lrnr_gts.R Lrnr_nnet.R Lrnr_bound.R default_metalearner.R debug_helpers.R Lrnr_grf.R density_utils.R Lrnr_rugarch.R Lrnr_density_hse.R Lrnr_revere_task.R sampling.R Lrnr_pca.R Lrnr_pkg_SuperLearner_method.R revere_task.R list_learners.R Custom_chain.R metalearners.R Lrnr_screener_augment.R reindex_folds.R CV_Lrnr_sl.R ts_utils.R Lrnr_expSmooth.R Lrnr_define_interactions.R make_learner_stack.R Lrnr_subset_covariates.R

No flags found

Use flags to group coverage reports by test type, project and/or folders.
Then setup custom commit statuses and notifications for each flag.

e.g., #unittest #integration

#production #enterprise

#frontend #backend

Learn more about Codecov Flags here.


@@ -113,6 +113,14 @@
Loading
113 113
  learner$subset_covariates(task)
114 114
}
115 115
116 +
#' @rdname learner_helpers
117 +
#'
118 +
#' @export
119 +
#
120 +
learner_process_formula <- function(learner, task) {
121 +
  learner$process_formula(task)
122 +
}
123 +
116 124
#' @rdname learner_helpers
117 125
#'
118 126
#' @export
@@ -128,6 +136,20 @@
Loading
128 136
  return(subset_delayed)
129 137
}
130 138
139 +
#' @rdname learner_helpers
140 +
#'
141 +
#' @export
142 +
#
143 +
delayed_learner_process_formula <- function(learner, task) {
144 +
  if (is(task, "Delayed")) {
145 +
    # only delay if task is delayed
146 +
    process_delayed <- delayed_fun(learner_process_formula)(learner, task)
147 +
    process_delayed$name <- "formula"
148 +
  } else {
149 +
    process_delayed <- learner_process_formula(learner, task)
150 +
  }
151 +
  return(process_delayed)
152 +
}
131 153
132 154
sl3_delayed_job_type <- function() {
133 155
  if (getOption("sl3.enable.future")) {

@@ -23,15 +23,6 @@
Loading
23 23
#'
24 24
#' @section Parameters:
25 25
#'   - \code{intercept = TRUE}: Should an intercept be included in the model?
26 -
#'     This argument is ignored when \code{formula} argument is not \code{NULL}.
27 -
#'   - \code{formula = NULL}: An object of class \code{formula} containing a
28 -
#'     description for the \code{\link[stats]{glm}} model to fit. NOTE that the
29 -
#'     \code{formula}'s response variable (i.e., variable before "\code{~}")
30 -
#'     must be identical to the outcome name as it's provided in the
31 -
#'     \code{task} and the \code{formula}'s regressors (i.e., variables after
32 -
#'     "\code{~}") must correspond to the column names in \code{X}, where
33 -
#'     \code{X} is the \code{task}'s processed dataset that's used for training
34 -
#'     learners and it can be accessed via \code{task$X}.
35 26
#'   - \code{...}: Other parameters passed to \code{\link[stats]{glm}} or
36 27
#'       \code{\link[stats]{glm.fit}}.
37 28
#'
@@ -57,7 +48,7 @@
Loading
57 48
  classname = "Lrnr_glm",
58 49
  inherit = Lrnr_base, portable = TRUE, class = TRUE,
59 50
  public = list(
60 -
    initialize = function(intercept = TRUE, formula = NULL, ...) {
51 +
    initialize = function(intercept = TRUE, ...) {
61 52
      super$initialize(params = args_to_list(), ...)
62 53
    }
63 54
  ),
@@ -75,30 +66,12 @@
Loading
75 66
      link_fun <- args$family$linkfun
76 67
77 68
      # specify data
78 -
      if (!is.null(args$formula)) {
79 -
        if (class(args$formula) != "formula") {
80 -
          print("Converting the provided formula to formula object")
81 -
          args$formula <- as.formula(args$formula)
82 -
        }
83 -
        if (!all.vars(args$formula)[1] == task$nodes$outcome) {
84 -
          stop(paste0(
85 -
            "Formula outcome name ", all.vars(args$formula)[1],
86 -
            " does not match the task's outcome name ", task$nodes$outcome
87 -
          ))
88 -
        }
89 -
        if (!all(all.vars(args$formula)[-1] %in% colnames(task$X))) {
90 -
          stop("Regressor variables in the formula are not columns in task$X")
91 -
        }
92 -
        args$data <- data.frame(outcome_type$format(task$Y), task$X)
93 -
        colnames(args$data)[1] <- task$nodes$outcome
69 +
      if (args$intercept) {
70 +
        args$x <- as.matrix(task$X_intercept)
94 71
      } else {
95 -
        if (args$intercept) {
96 -
          args$x <- as.matrix(task$X_intercept)
97 -
        } else {
98 -
          args$x <- as.matrix(task$X)
99 -
        }
100 -
        args$y <- outcome_type$format(task$Y)
72 +
        args$x <- as.matrix(task$X)
101 73
      }
74 +
      args$y <- outcome_type$format(task$Y)
102 75
103 76
104 77
      if (task$has_node("weights")) {
@@ -111,21 +84,12 @@
Loading
111 84
112 85
      args$control <- glm.control(trace = FALSE)
113 86
114 -
      if (!is.null(args$formula)) {
115 -
        SuppressGivenWarnings(
116 -
          {
117 -
            fit_object <- call_with_args(stats::glm, args)
118 -
          },
119 -
          GetWarningsToSuppress()
120 -
        )
121 -
      } else {
122 -
        SuppressGivenWarnings(
123 -
          {
124 -
            fit_object <- call_with_args(stats::glm.fit, args)
125 -
          },
126 -
          GetWarningsToSuppress()
127 -
        )
128 -
      }
87 +
      SuppressGivenWarnings(
88 +
        {
89 +
          fit_object <- call_with_args(stats::glm.fit, args)
90 +
        },
91 +
        GetWarningsToSuppress()
92 +
      )
129 93
130 94
      fit_object$linear.predictors <- NULL
131 95
      fit_object$weights <- NULL
@@ -134,7 +98,7 @@
Loading
134 98
      fit_object$residuals <- NULL
135 99
      fit_object$fitted.values <- NULL
136 100
      fit_object$effects <- NULL
137 -
      fit_object$qr$qr <- NULL
101 +
      fit_object$qr <- NULL
138 102
      fit_object$linkinv_fun <- linkinv_fun
139 103
      fit_object$link_fun <- link_fun
140 104
      fit_object$training_offset <- task$has_node("offset")
@@ -143,7 +107,7 @@
Loading
143 107
    .predict = function(task) {
144 108
      verbose <- getOption("sl3.verbose")
145 109
146 -
      if (self$params$intercept & is.null(self$params$formula)) {
110 +
      if (self$params$intercept) {
147 111
        X <- task$X_intercept
148 112
      } else {
149 113
        X <- task$X
@@ -152,26 +116,16 @@
Loading
152 116
      coef <- self$fit_object$coef
153 117
154 118
      if (nrow(X) > 0 & !all(is.na(coef))) {
155 -
        if (!is.null(self$params$formula)) {
156 -
          predictions <- stats::predict(
157 -
            private$.fit_object,
158 -
            newdata = X, type = "response"
119 +
        X <- as.matrix(X[, which(!is.na(coef)), drop = FALSE, with = FALSE])
120 +
        eta <- X %*% coef[!is.na(coef)]
121 +
        if (self$fit_object$training_offset) {
122 +
          offset <- task$offset_transformed(
123 +
            self$fit_object$link_fun,
124 +
            for_prediction = TRUE
159 125
          )
160 -
          if (task$outcome_type$type == "categorical") {
161 -
            predictions <- pack_predictions(predictions)
162 -
          }
163 -
        } else {
164 -
          X <- as.matrix(X[, which(!is.na(coef)), drop = FALSE, with = FALSE])
165 -
          eta <- X %*% coef[!is.na(coef)]
166 -
          if (self$fit_object$training_offset) {
167 -
            offset <- task$offset_transformed(
168 -
              self$fit_object$link_fun,
169 -
              for_prediction = TRUE
170 -
            )
171 -
            eta <- eta + offset
172 -
          }
173 -
          predictions <- as.vector(self$fit_object$linkinv_fun(eta))
126 +
          eta <- eta + offset
174 127
        }
128 +
        predictions <- as.vector(self$fit_object$linkinv_fun(eta))
175 129
      } else {
176 130
        predictions <- rep.int(NA, nrow(X))
177 131
      }

@@ -139,16 +139,18 @@
Loading
139 139
      # trains learner to data
140 140
      assert_that(is(task, "sl3_Task"))
141 141
142 -
      subsetted_task <- self$subset_covariates(task)
142 +
      task <- self$subset_covariates(task)
143 +
      processed_task <- self$process_formula(task)
144 +
143 145
      verbose <- getOption("sl3.verbose")
144 146
145 147
      if (!is.null(trained_sublearners)) {
146 -
        fit_object <- private$.train(subsetted_task, trained_sublearners)
148 +
        fit_object <- private$.train(processed_task, trained_sublearners)
147 149
      } else {
148 -
        fit_object <- private$.train(subsetted_task)
150 +
        fit_object <- private$.train(processed_task)
149 151
      }
150 152
      new_object <- self$clone() # copy parameters, and whatever else
151 -
      new_object$set_train(fit_object, subsetted_task)
153 +
      new_object$set_train(fit_object, task)
152 154
      return(new_object)
153 155
    },
154 156
    set_train = function(fit_object, training_task) {
@@ -182,8 +184,10 @@
Loading
182 184
      }
183 185
184 186
      assert_that(is(task, "sl3_Task"))
185 -
      subsetted_task <- self$subset_covariates(task)
186 -
      predictions <- private$.predict(subsetted_task)
187 +
      task <- self$subset_covariates(task)
188 +
      task <- self$process_formula(task)
189 +
190 +
      predictions <- private$.predict(task)
187 191
188 192
      ncols <- ncol(predictions)
189 193
      if (!is.null(ncols) && (ncols == 1)) {
@@ -198,20 +202,23 @@
Loading
198 202
      }
199 203
200 204
      assert_that(is(task, "sl3_Task"))
201 -
      subsetted_task <- self$subset_covariates(task)
205 +
      task <- self$subset_covariates(task)
206 +
      task <- self$process_formula(task)
207 +
202 208
      # use custom chain function if provided
203 209
      if (!is.null(private$.custom_chain)) {
204 -
        next_task <- private$.custom_chain(self, subsetted_task)
210 +
        next_task <- private$.custom_chain(self, task)
205 211
      } else {
206 -
        next_task <- private$.chain(subsetted_task)
212 +
        next_task <- private$.chain(task)
207 213
      }
208 214
      return(next_task)
209 215
    },
210 216
    train_sublearners = function(task) {
211 217
      # TODO: add error handling
212 -
      subsetted_task <- delayed_learner_subset_covariates(self, task)
218 +
      task <- delayed_learner_subset_covariates(self, task)
219 +
      task <- delayed_learner_process_formula(self, task)
213 220
214 -
      return(private$.train_sublearners(subsetted_task))
221 +
      return(private$.train_sublearners(task))
215 222
    },
216 223
    train = function(task) {
217 224
      delayed_fit <- delayed_learner_train(self, task)
@@ -289,6 +296,44 @@
Loading
289 296
      new_object <- new_self$clone() # copy parameters, and whatever else
290 297
      new_object$set_train(new_fit_object, new_task)
291 298
      return(new_object)
299 +
    },
300 +
    process_formula = function(task) {
301 +
      if ("formula" %in% names(self$params) &&
302 +
        !is.null(self$params[["formula"]])) {
303 +
        form <- self$params$formula
304 +
        if (class(form) != "formula") form <- as.formula(form)
305 +
306 +
        # check response variable corresponds to outcome in task, if provided
307 +
        if (attr(terms(form), "response")) {
308 +
          if (!all.vars(form)[1] == task$nodes$outcome) {
309 +
            stop(paste0(
310 +
              "Outcome variable in formula ", all.vars(form)[1],
311 +
              " does not match the task's outcome ", task$nodes$outcome
312 +
            ))
313 +
          }
314 +
          formula_covars <- all.vars(form)[-1]
315 +
        } else {
316 +
          formula_covars <- all.vars(form)
317 +
        }
318 +
        # check that regressors in the formula are contained in the task
319 +
        if (!all(formula_covars %in% task$nodes$covariates)) {
320 +
          stop("Regressors in the formula are not covariates in task")
321 +
        }
322 +
323 +
        # get data corresponding to formula and add new columns to the task
324 +
        data <- as.data.table(stats::model.matrix(form, data = task$data))
325 +
        new_cols <- setdiff(names(data), names(task$data))
326 +
        if (any(grepl("Intercept", new_cols))) {
327 +
          new_cols <- new_cols[!grepl("Intercept", new_cols)]
328 +
        }
329 +
        data <- data[, new_cols, with = FALSE]
330 +
        new_cols <- task$add_columns(data)
331 +
        return(
332 +
          task$next_in_chain(covariates = names(data), column_names = new_cols)
333 +
        )
334 +
      } else {
335 +
        return(task)
336 +
      }
292 337
    }
293 338
  ),
294 339
  active = list(

@@ -71,6 +71,9 @@
Loading
71 71
                          ...) {
72 72
      params <- args_to_list()
73 73
      super$initialize(params = params, ...)
74 +
    },
75 +
    process_formula = function(task) {
76 +
      return(task)
74 77
    }
75 78
  ),
76 79
  private = list(

Everything is accounted for!

No changes detected that need to be reviewed.
What changes does Codecov check for?
Lines, not adjusted in diff, that have changed coverage data.
Files that introduced coverage data that had none before.
Files that have missing coverage data that once were tracked.
Files Coverage
R 0.04% 77.01%
Project Totals (90 files) 77.01%
Loading