tlverse / sl3
Showing 65 of 247 files from the diff.
Other files ignored by Codecov
man/Lrnr_rpart.Rd has changed.
man/Lrnr_nnls.Rd has changed.
man/Lrnr_sl.Rd has changed.
man/Lrnr_hts.Rd has changed.
NAMESPACE has changed.
man/Stack.Rd has changed.
man/Lrnr_gam.Rd has changed.
docs/index.html has changed.
docs/pkgdown.yml has changed.
man/Lrnr_solnp.Rd has changed.
man/Lrnr_tsDyn.Rd has changed.
man/Lrnr_pca.Rd has changed.
man/Lrnr_svm.Rd has changed.
man/Lrnr_arima.Rd has changed.
man/Lrnr_gts.Rd has changed.
man/Lrnr_base.Rd has changed.
man/Lrnr_grf.Rd has changed.
man/Lrnr_nnet.Rd has changed.
man/Lrnr_caret.Rd has changed.
R/Lrnr_lstm.R was deleted.
man/Lrnr_earth.Rd has changed.
man/Lrnr_glm.Rd has changed.
man/Lrnr_lstm.Rd was deleted.
README.md has changed.
man/Lrnr_optim.Rd has changed.
man/Lrnr_cv.Rd has changed.
man/Lrnr_bound.Rd has changed.
man/Lrnr_gbm.Rd has changed.
man/Lrnr_mean.Rd has changed.
man/Pipeline.Rd has changed.

@@ -42,10 +42,8 @@
Loading
42 42
      super$initialize(params = params, ...)
43 43
    }
44 44
  ),
45 -
46 45
  private = list(
47 46
    .properties = c("density"),
48 -
49 47
    .train = function(task) {
50 48
      discretized <- discretize_variable(task$Y,
51 49
        type = self$params$type,
@@ -72,7 +70,6 @@
Loading
72 70
      )
73 71
      return(fit_object)
74 72
    },
75 -
76 73
    .predict = function(task) {
77 74
      # make discretized task
78 75
      discretized <- discretize_variable(task$Y,

@@ -91,10 +91,8 @@
Loading
91 91
      super$initialize(params = args_to_list(), ...)
92 92
    }
93 93
  ),
94 -
95 94
  private = list(
96 95
    .properties = c("continuous", "binomial", "categorical", "weights"),
97 -
98 96
    .train = function(task) {
99 97
      args <- self$params
100 98
      outcome_type <- self$get_outcome_type(task)
@@ -120,7 +118,6 @@
Loading
120 118
      fit_object <- call_with_args(grf::quantile_forest, args)
121 119
      return(fit_object)
122 120
    },
123 -
124 121
    .predict = function(task) {
125 122
      # quantiles for which to predict
126 123
      quantiles_pred <- private$.params$quantiles_pred

@@ -58,7 +58,6 @@
Loading
58 58
      "continuous", "binomial", "categorical", "weights",
59 59
      "offset"
60 60
    ),
61 -
62 61
    .train = function(task) {
63 62
      verbose <- getOption("sl3.verbose")
64 63
      params <- self$params
@@ -125,7 +124,6 @@
Loading
125 124
      fit_object$name <- "solnp"
126 125
      return(fit_object)
127 126
    },
128 -
129 127
    .predict = function(task = NULL) {
130 128
      verbose <- getOption("sl3.verbose")
131 129
      X <- as.matrix(task$X)

@@ -82,7 +82,6 @@
Loading
82 82
      private$.fit_object$is_error <- is_error
83 83
    }
84 84
  ),
85 -
86 85
  active = list(
87 86
    name = function() {
88 87
      # learners = self$params$learners
@@ -96,11 +95,9 @@
Loading
96 95
      return(result)
97 96
    }
98 97
  ),
99 -
100 98
  private = list(
101 99
    # modified names of learners
102 100
    .learner_names = NULL,
103 -
104 101
    .train_sublearners = function(task) {
105 102
      # generate training subtasks
106 103
      learners <- self$params$learners

@@ -1,7 +1,8 @@
Loading
1 -
#' BART Machine Learner
1 +
#' bartMachine: Bayesian Additive Regression Trees (BART)
2 2
#'
3 -
#' This learner implements Bayesian Additive Regression Trees, using the
4 -
#' \code{bartMachine} package.
3 +
#' This learner implements Bayesian Additive Regression Trees via
4 +
#' \pkg{bartMachine} (described in \insertCite{bartMachine;textual}{sl3})
5 +
#' and the function \code{\link[bartMachine]{bartMachine}}.
5 6
#'
6 7
#' @docType class
7 8
#'
@@ -12,21 +13,32 @@
Loading
12 13
#'
13 14
#' @keywords data
14 15
#'
15 -
#' @return Learner object with methods for training and prediction. See
16 -
#'   \code{\link{Lrnr_base}} for documentation on learners.
16 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
17 +
#'  methods for training and prediction. For a full list of learner
18 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
17 19
#'
18 -
#' @format \code{\link{R6Class}} object.
20 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
21 +
#'  \code{\link{Lrnr_base}}.
19 22
#'
20 23
#' @family Learners
21 24
#'
22 25
#' @section Parameters:
23 -
#' \describe{
24 -
#'   \item{\code{...}}{Parameters passed to
25 -
#'   \code{\link[bartMachine]{bartMachine}} and
26 -
#'   \code{\link[bartMachine]{build_bart_machine}}. See it's documentation for
27 -
#'   details.}
28 -
#' }
26 +
#'   - \code{...}: Parameters passed to \code{\link[bartMachine]{bartMachine}}.
27 +
#'       See it's documentation for details.
29 28
#'
29 +
#' @references
30 +
#'  \insertAllCited{}
31 +
#'
32 +
#' @examples
33 +
#' # set up ML task
34 +
#' data(cpp_imputed)
35 +
#' covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs")
36 +
#' task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz")
37 +
#'
38 +
#' # fit a bartMachine model and predict from it
39 +
#' bartMachine_learner <- make_learner(Lrnr_bartMachine)
40 +
#' bartMachine_fit <- bartMachine_learner$train(task)
41 +
#' preds <- bartMachine_fit$predict()
30 42
Lrnr_bartMachine <- R6Class(
31 43
  classname = "Lrnr_bartMachine",
32 44
  inherit = Lrnr_base, portable = TRUE, class = TRUE,
@@ -44,14 +56,11 @@
Loading
44 56
        )
45 57
        options(java.parameters = "-Xmx2500m")
46 58
      }
47 -
48 59
      super$initialize(params = args_to_list(), ...)
49 60
    }
50 61
  ),
51 -
52 62
  private = list(
53 63
    .properties = c("continuous", "binomial"),
54 -
55 64
    .train = function(task) {
56 65
      args <- self$params
57 66
@@ -72,7 +81,6 @@
Loading
72 81
73 82
      return(fit_object)
74 83
    },
75 -
76 84
    .predict = function(task) {
77 85
      predictions <- stats::predict(
78 86
        private$.fit_object,

@@ -39,7 +39,6 @@
Loading
39 39
  ),
40 40
  private = list(
41 41
    .properties = c("screener"),
42 -
43 42
    .train = function(task) {
44 43
      args <- self$params
45 44
@@ -102,15 +101,12 @@
Loading
102 101
      fit_object <- list(selected = selected)
103 102
      return(fit_object)
104 103
    },
105 -
106 104
    .predict = function(task) {
107 105
      task$X[, private$.fit_object$selected, with = FALSE, drop = FALSE]
108 106
    },
109 -
110 107
    .chain = function(task) {
111 108
      return(task$next_in_chain(covariates = private$.fit_object$selected))
112 109
    },
113 -
114 110
    .required_packages = c()
115 111
  )
116 112
)

@@ -1,8 +1,11 @@
Loading
1 -
#' GBM - generalized boosted regression models
1 +
#' GBM: Generalized Boosted Regression Models
2 2
#'
3 -
#' This learner provides fitting procedures for building generalized boosted
4 -
#' regression models, using the function \code{\link[gbm]{gbm}} from the
5 -
#' \code{gbm} package.
3 +
#' This learner provides fitting procedures for generalized boosted regression
4 +
#' trees, using the routines from \pkg{gbm}, through a call to the function
5 +
#' \code{\link[gbm]{gbm.fit}}. Though a variety of gradient boosting strategies
6 +
#' have seen popularity in machine learning, a few of the early methodological
7 +
#' descriptions were given by \insertCite{friedman-gbm1;textual}{sl3} and
8 +
#' \insertCite{friedman-gbm2;textual}{sl3}.
6 9
#'
7 10
#' @docType class
8 11
#'
@@ -13,48 +16,63 @@
Loading
13 16
#'
14 17
#' @keywords data
15 18
#'
16 -
#' @return Learner object with methods for training and prediction. See
17 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
19 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
20 +
#'  methods for training and prediction. For a full list of learner
21 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
18 22
#'
19 -
#' @format \code{\link{R6Class}} object.
23 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
24 +
#'  \code{\link{Lrnr_base}}.
20 25
#'
21 26
#' @family Learners
22 27
#'
28 +
#' @seealso [Lrnr_xgboost] for the extreme gradient boosted tree models from
29 +
#'  the Xgboost framework (via the \pkg{xgboost} package) and [Lrnr_lightgbm]
30 +
#'  for the faster and more efficient gradient boosted trees from the LightGBM
31 +
#'  framework (via the \pkg{lightgbm} package).
32 +
#'
23 33
#' @section Parameters:
24 -
#' \describe{
25 -
#'   \item{\code{n.trees}}{Integer specifying the total number of trees to fit.
26 -
#'     This is equivalent to the number of iterations and the number of basis
27 -
#'     functions in the additive expansion. Default is 10000.
28 -
#'   }
29 -
#'   \item{\code{interaction.depth}}{Integer specifying the maximum depth of
30 -
#'     each tree (i.e.,  the highest level ofvariable interactions allowed).
31 -
#'     A value of 1 implies an additive model, a valueof 2 implies a model with
32 -
#'     up to 2-way interactions, etc. Default is 2.
33 -
#'   }
34 -
#'   \item{\code{shrinkage}}{A shrinkage parameter applied to each tree in the
35 -
#'     expansion. Also known asthe learning rate or step-size reduction; 0.001
36 -
#'     to 0.1 usually work, but a smallerlearning rate typically requires more
37 -
#'     trees. Default is 0.001.
38 -
#'   }
39 -
#'   \item{\code{...}}{Other parameters passed to \code{\link[gbm]{gbm}}.
40 -
#'     See its documentation for details.
41 -
#'   }
42 -
#' }
43 -
#
34 +
#'   - \code{n.trees}: An integer specifying the total number of trees to fit.
35 +
#'       This is equivalent to the number of iterations and the number of basis
36 +
#'       functions in the additive expansion. The default is 10000.
37 +
#'   - \code{interaction.depth}: An integer specifying the maximum depth of
38 +
#'       each tree (i.e., the highest level of allowed variable interactions).
39 +
#'       A value of 1 implies an additive model, while a value of 2 implies a
40 +
#'       model with up to 2-way interactions, etc. The default is 2.
41 +
#'   - \code{shrinkage}: A shrinkage parameter applied to each tree in the
42 +
#'       expansion. Also known as the learning rate or step-size reduction;
43 +
#'       values of 0.001 to 0.1 have been found to usually work, but a smaller
44 +
#'       learning rate typically requires more trees. The default is 0.001.
45 +
#'   - \code{...}: Other parameters passed to \code{\link[gbm]{gbm}}. See its
46 +
#'       documentation for details.
47 +
#'
48 +
#' @references
49 +
#'  \insertAllCited{}
50 +
#'
51 +
#' @examples
52 +
#' data(cpp_imputed)
53 +
#' # create task for prediction
54 +
#' cpp_task <- sl3_Task$new(
55 +
#'   data = cpp_imputed,
56 +
#'   covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "sexn"),
57 +
#'   outcome = "haz"
58 +
#' )
59 +
#'
60 +
#' # initialization, training, and prediction with the defaults
61 +
#' gbm_lrnr <- Lrnr_gbm$new()
62 +
#' gbm_fit <- gbm_lrnr$train(cpp_task)
63 +
#' gbm_preds <- gbm_fit$predict()
44 64
Lrnr_gbm <- R6Class(
45 65
  classname = "Lrnr_gbm", inherit = Lrnr_base,
46 66
  portable = TRUE, class = TRUE,
47 67
  public = list(
48 -
    initialize = function(n.trees = 10000, interaction.depth = 2,
68 +
    initialize = function(n.trees = 10000L, interaction.depth = 2,
49 69
                          shrinkage = 0.001, ...) {
50 70
      params <- args_to_list()
51 71
      super$initialize(params = params, ...)
52 72
    }
53 73
  ),
54 -
55 74
  private = list(
56 75
    .properties = c("continuous", "binomial"),
57 -
58 76
    .train = function(task) {
59 77
      args <- self$params
60 78
      outcome_type <- self$get_outcome_type(task)
@@ -86,7 +104,6 @@
Loading
86 104
      fit_object <- call_with_args(gbm::gbm.fit, args)
87 105
      return(fit_object)
88 106
    },
89 -
90 107
    .predict = function(task) {
91 108
      preds <- stats::predict(
92 109
        object = private$.fit_object, newdata = task$X,

@@ -101,7 +101,6 @@
Loading
101 101
102 102
      return(fit_object)
103 103
    },
104 -
105 104
    .predict = function(task = NULL) {
106 105
      args <- self$params
107 106

@@ -41,12 +41,10 @@
Loading
41 41
    .properties = c(
42 42
      "continuous", "binomial", "categorical", "weights", "wrapper"
43 43
    ),
44 -
45 44
    .train = function(task) {
46 45
      fit_object <- list()
47 46
      return(fit_object)
48 47
    },
49 -
50 48
    .predict = function(task = NULL) {
51 49
      verbose <- getOption("sl3.verbose")
52 50
      X <- as.matrix(task$X)

@@ -1,138 +1,119 @@
Loading
1 1
#' Harmonic Regression
2 2
#'
3 3
#' @description This learner fits first harmonics in a Fourier expansion to one
4 -
#' or more time series. Fourier decomposition relies on
5 -
#' \code{\link[forecast]{fourier}}, and the time series is fit using
6 -
#' \code{\link[forecast]{tslm}}
4 +
#'  or more time series. Fourier decomposition relies on
5 +
#'  \code{\link[forecast]{fourier}}, and the time series is fit using
6 +
#'  \code{\link[forecast]{tslm}}. For further details on working with harmonic
7 +
#'  regression for time-series with package \pkg{forecast}, consider consulting
8 +
#'  \insertCite{forecast;textual}{sl3}) and
9 +
#'  \insertCite{hyndman2008forecast-jss;textual}{sl3}).
7 10
#'
8 11
#' @docType class
9 12
#'
10 13
#' @importFrom R6 R6Class
11 -
#' @importFrom assertthat assert_that is.count is.flag
12 14
#' @importFrom stats arima
13 15
#'
14 16
#' @export
15 17
#'
16 18
#' @keywords data
17 19
#'
18 -
#' @return Learner object with methods for training and prediction. See
19 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
20 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
21 +
#'  methods for training and prediction. For a full list of learner
22 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
20 23
#'
21 -
#' @format \code{\link{R6Class}} object.
24 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
25 +
#'  \code{\link{Lrnr_base}}.
22 26
#'
23 27
#' @family Learners
24 28
#'
25 29
#' @section Parameters:
26 -
#' \describe{
27 -
#'   \item{\code{Kparam}}{Maximum order of the fourier terms. Passed to
28 -
#'     \code{\link[forecast]{fourier}}.}
29 -
#'   \item{\code{n.ahead=NULL}}{ The forecast horizon. If not specified, returns
30 -
#'     forecast of size \code{task$Y}.}
31 -
#'   \item{\code{freq}}{The frequency of the time series.}
32 -
#'   \item{\code{...}}{Not used.}
33 -
#' }
34 -
#
30 +
#'  - \code{K}: Maximum order of the fourier terms. Passed to
31 +
#'      \code{\link[forecast]{fourier}}.
32 +
#'  - \code{freq}: The frequency of the time series.
33 +
#'  - \code{...}: Other parameters passed to \code{\link[forecast]{fourier}}.
34 +
#'
35 +
#' @references
36 +
#'  \insertAllCited{}
37 +
#'
38 +
#' @examples
39 +
#' library(origami)
40 +
#' library(data.table)
41 +
#' data(bsds)
42 +
#'
43 +
#' # make folds appropriate for time-series cross-validation
44 +
#' folds <- make_folds(bsds,
45 +
#'   fold_fun = folds_rolling_window, window_size = 500,
46 +
#'   validation_size = 100, gap = 0, batch = 50
47 +
#' )
48 +
#'
49 +
#' # build task by passing in external folds structure
50 +
#' task <- sl3_Task$new(
51 +
#'   data = bsds,
52 +
#'   folds = folds,
53 +
#'   covariates = c(
54 +
#'     "weekday", "temp"
55 +
#'   ),
56 +
#'   outcome = "cnt"
57 +
#' )
58 +
#'
59 +
#' # create tasks for taining and validation
60 +
#' train_task <- training(task, fold = task$folds[[1]])
61 +
#' valid_task <- validation(task, fold = task$folds[[1]])
62 +
#'
63 +
#' # instantiate learner, then fit and predict
64 +
#' HarReg_learner <- Lrnr_HarmonicReg$new(K = 7, freq = 105)
65 +
#' HarReg_fit <- HarReg_learner$train(train_task)
66 +
#' HarReg_preds <- HarReg_fit$predict(valid_task)
35 67
Lrnr_HarmonicReg <- R6Class(
36 68
  classname = "Lrnr_HarmonicReg",
37 69
  inherit = Lrnr_base,
38 70
  portable = TRUE,
39 71
  class = TRUE,
40 72
  public = list(
41 -
    initialize = function(Kparam, n.ahead = NULL, freq, ...) {
73 +
    initialize = function(K, freq, ...) {
42 74
      params <- args_to_list()
43 75
      super$initialize(params = params, ...)
44 -
      if (!is.null(n.ahead)) {
45 -
        warning("n.ahead paramater is specified- obtaining an ensemble will fail. 
46 -
                Please only use for obtaining individual learner forcasts.")
47 -
      }
48 76
    }
49 77
  ),
50 -
51 78
  private = list(
52 79
    .properties = c("timeseries", "continuous"),
53 -
54 80
    .train = function(task) {
55 -
      params <- self$params
56 -
      Kparam <- params[["Kparam"]]
57 -
      freq <- params[["freq"]]
58 -
      task_ts <- ts(task$Y, frequency = freq)
81 +
      args <- self$params
82 +
      args$x <- ts(task$Y, frequency = args$freq)
59 83
60 -
      if (length(freq) != length(Kparam)) {
84 +
      # Checks
85 +
      if (length(args$freq) != length(args$K)) {
61 86
        stop("Number of periods does not match number of orders")
62 -
      } else if (any(2 * Kparam > freq)) {
87 +
      } else if (any(2 * args$K > args$freq)) {
63 88
        stop("K must be not be greater than period/2")
64 89
      }
65 90
66 -
      fourier_fit <- forecast::fourier(task_ts, K = Kparam)
67 -
      fit_object <- forecast::tslm(task_ts ~ fourier_fit)
91 +
      # Passes a warning for an extra argument: that's ok
92 +
      # forecast::fourier doesn't take freq as an argument anymore
93 +
      fourier_fit <- call_with_args(
94 +
        forecast::fourier, args,
95 +
        ignore = "freq"
96 +
      )
97 +
      fit_object <- forecast::tslm(args$x ~ fourier_fit)
68 98
      return(fit_object)
69 99
    },
70 -
71 100
    .predict = function(task = NULL) {
72 -
      params <- self$params
73 -
      n.ahead <- params[["n.ahead"]]
74 -
      freq <- params[["freq"]]
75 -
      Kparam <- params[["Kparam"]]
76 -
77 -
      # See if there is gap between training and validation:
78 -
      gap <- min(task$folds[[1]]$validation_set) - max(task$folds[[1]]$training_set)
79 -
80 -
      if (gap > 1) {
81 -
        if (is.null(n.ahead)) {
82 -
          n.ahead <- task$nrow + gap
83 -
        } else {
84 -
          n.ahead <- n.ahead + gap
85 -
        }
86 -
        task_ts <- ts(task$Y, frequency = freq)
87 -
        fourier_fit <- data.frame(forecast::fourier(
88 -
          task_ts,
89 -
          K = Kparam,
90 -
          h = n.ahead
91 -
        ))
92 -
        predictions <- forecast::forecast(private$.fit_object, fourier_fit)
93 -
94 -
        # Create output as in glm
95 -
        predictions <- as.numeric(predictions$mean)
96 -
        predictions <- structure(predictions, names = seq_len(length(predictions)))
97 -
        return(predictions)
98 -
      } else if (gap == 1) {
99 -
        if (is.null(n.ahead)) {
100 -
          n.ahead <- task$nrow
101 -
        }
102 -
        task_ts <- ts(task$Y, frequency = freq)
103 -
        fourier_fit <- data.frame(forecast::fourier(
104 -
          task_ts,
105 -
          K = Kparam,
106 -
          h = n.ahead
107 -
        ))
108 -
        predictions <- forecast::forecast(private$.fit_object, fourier_fit)
109 -
110 -
        # Create output as in glm
111 -
        predictions <- as.numeric(predictions$mean)
112 -
        predictions <- structure(predictions, names = seq_len(length(predictions)))
113 -
        return(predictions)
114 -
      } else if (gap < 1) {
115 -
        warning("Validation samples come before Training samples; 
116 -
                please specify one of the time-series fold structures.")
117 -
118 -
        if (is.null(n.ahead)) {
119 -
          n.ahead <- task$nrow
120 -
        }
121 -
        task_ts <- ts(task$Y, frequency = freq)
122 -
        fourier_fit <- data.frame(forecast::fourier(
123 -
          task_ts,
124 -
          K = Kparam,
125 -
          h = n.ahead
126 -
        ))
127 -
        predictions <- forecast::forecast(private$.fit_object, fourier_fit)
128 -
129 -
        # Create output as in glm
130 -
        predictions <- as.numeric(predictions$mean)
131 -
        predictions <- structure(predictions, names = seq_len(length(predictions)))
132 -
        return(predictions)
133 -
      }
101 +
      h <- ts_get_pred_horizon(self$training_task, task)
102 +
      x <- ts(task$Y, frequency = self$params$freq)
103 +
      fourier_fit <- data.frame(
104 +
        forecast::fourier(
105 +
          x,
106 +
          K = self$params$K,
107 +
          h = h
108 +
        )
109 +
      )
110 +
      raw_preds <- forecast::forecast(private$.fit_object, fourier_fit)
111 +
      preds <- as.numeric(raw_preds$mean)
112 +
      requested_preds <- ts_get_requested_preds(
113 +
        self$training_task, task, preds
114 +
      )
115 +
      return(requested_preds)
134 116
    },
135 -
136 117
    .required_packages = c("forecast")
137 118
  )
138 119
)

@@ -54,10 +54,8 @@
Loading
54 54
      super$initialize(params = params, ...)
55 55
    }
56 56
  ),
57 -
58 57
  private = list(
59 58
    .properties = c("preprocessing"),
60 -
61 59
    .train = function(task) {
62 60
      verbose <- getOption("sl3.verbose")
63 61
      fit_args <- self$params[names(self$params) != "n_comp"]

@@ -50,7 +50,6 @@
Loading
50 50
  private = list(
51 51
    .covariates = NULL,
52 52
    .properties = "density",
53 -
54 53
    .train = function(task) {
55 54
      verbose <- getOption("sl3.verbose")
56 55
      params <- self$params
@@ -74,7 +73,6 @@
Loading
74 73
      fit_object$name <- "solnp"
75 74
      return(fit_object)
76 75
    },
77 -
78 76
    .predict = function(task = NULL) {
79 77
      verbose <- getOption("sl3.verbose")
80 78
      X <- task$X

@@ -44,7 +44,6 @@
Loading
44 44
      super$initialize(params = params, ...)
45 45
    }
46 46
  ),
47 -
48 47
  private = list(
49 48
    .properties = c("binomial", "continuous", "weights", "ids", "wrapper"),
50 49
    .train = function(task) {
@@ -65,7 +64,6 @@
Loading
65 64
      )$fit
66 65
      return(fit_object)
67 66
    },
68 -
69 67
    .predict = function(task) {
70 68
      args <- self$params
71 69
      outcome_type <- private$.training_outcome_type

@@ -58,15 +58,13 @@
Loading
58 58
      params <- args_to_list()
59 59
      super$initialize(params = params, ...)
60 60
      if (!is.null(n.ahead)) {
61 -
        warning("n.ahead paramater is specified- obtaining an ensemble will fail. 
61 +
        warning("n.ahead paramater is specified- obtaining an ensemble will fail.
62 62
                Please only use for obtaining individual learner forcasts.")
63 63
      }
64 64
    }
65 65
  ),
66 -
67 66
  private = list(
68 67
    .properties = c("timeseries", "continuous"),
69 -
70 68
    .train = function(task) {
71 69
      args <- self$params
72 70
      # Support for a single time-series
@@ -115,7 +113,7 @@
Loading
115 113
        predictions <- structure(predictions, names = seq_len(length(predictions)))
116 114
        return(predictions)
117 115
      } else if (gap < 1) {
118 -
        warning("Validation samples come before Training samples; 
116 +
        warning("Validation samples come before Training samples;
119 117
                please specify one of the time-series fold structures.")
120 118
121 119
        if (is.null(n.ahead)) {

@@ -1,11 +1,13 @@
Loading
1 1
#' LightGBM: Light Gradient Boosting Machine
2 2
#'
3 3
#' This learner provides fitting procedures for \code{lightgbm} models, using
4 -
#' \pkg{lightgbm}, via \code{\link[lightgbm]{lgb.train}}. These gradient
5 -
#' boosted classification and regression tree models feature faster training
6 -
#' speed and higher efficiency, Lower memory usage, better accuracy, and
7 -
#' improved handling large-scale data. For details on the fitting procedure,
8 -
#' consult the documentation of the \pkg{lightgbm} package.
4 +
#' the \pkg{lightgbm} package, via \code{\link[lightgbm]{lgb.train}}. These
5 +
#' gradient boosted decision tree models feature faster training speed and
6 +
#' efficiency, lower memory usage than competing frameworks (e.g., from the
7 +
#' \pkg{xgboost} package), better prediction accuracy, and improved handling of
8 +
#' large-scale data. For details on the fitting procedure and its tuning
9 +
#' parameters, consult the documentation of the \pkg{lightgbm} package. The
10 +
#' LightGBM framework was introduced in \insertCite{lightgbm;textual}{sl3}).
9 11
#'
10 12
#' @docType class
11 13
#'
@@ -16,18 +18,43 @@
Loading
16 18
#'
17 19
#' @keywords data
18 20
#'
19 -
#' @return Learner object with methods for training and prediction. See
20 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
21 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
22 +
#'  methods for training and prediction. For a full list of learner
23 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
21 24
#'
22 -
#' @format \code{\link{R6Class}} object.
25 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
26 +
#'  \code{\link{Lrnr_base}}.
23 27
#'
24 28
#' @family Learners
25 29
#'
30 +
#' @seealso [Lrnr_gbm] for standard gradient boosting models (via the \pkg{gbm}
31 +
#'  package) and [Lrnr_xgboost] for the extreme gradient boosted tree models
32 +
#'  from the Xgboost framework (via the \pkg{xgboost} package).
33 +
#'
26 34
#' @section Parameters:
27 35
#'   - \code{num_threads = 1L}: Number of threads for hyperthreading.
28 36
#'   - \code{...}: Other arguments passed to \code{\link[lightgbm]{lgb.train}}.
29 37
#'       See its documentation for further details.
30 38
#'
39 +
#' @references
40 +
#'  \insertAllCited{}
41 +
#'
42 +
#' @examples
43 +
#' data(cpp_imputed)
44 +
#' # create task for prediction
45 +
#' cpp_task <- sl3_Task$new(
46 +
#'   data = cpp_imputed,
47 +
#'   covariates = c("bmi", "parity", "mage", "sexn"),
48 +
#'   outcome = "haz"
49 +
#' )
50 +
#'
51 +
#' # initialization, training, and prediction with the defaults
52 +
#' lgb_lrnr <- Lrnr_lightgbm$new()
53 +
#' lgb_fit <- lgb_lrnr$train(cpp_task)
54 +
#' lgb_preds <- lgb_fit$predict()
55 +
#'
56 +
#' # get feature importance from fitted model
57 +
#' lgb_varimp <- lgb_fit$importance()
31 58
Lrnr_lightgbm <- R6Class(
32 59
  classname = "Lrnr_lightgbm", inherit = Lrnr_base,
33 60
  portable = TRUE, class = TRUE,
@@ -54,7 +81,6 @@
Loading
54 81
      "continuous", "binomial", "categorical", "weights", "offset",
55 82
      "importance"
56 83
    ),
57 -
58 84
    .train = function(task) {
59 85
      args <- self$params
60 86
@@ -81,7 +107,7 @@
Loading
81 107
      # add observation-level weights if detected
82 108
      if (task$has_node("weights")) {
83 109
        try(lightgbm::setinfo(args$data, "weight", as.numeric(task$weights)),
84 -
            silent = TRUE
110 +
          silent = TRUE
85 111
        )
86 112
      }
87 113
@@ -117,7 +143,6 @@
Loading
117 143
      fit_object <- call_with_args(lightgbm::lgb.train, args, keep_all = TRUE)
118 144
      return(fit_object)
119 145
    },
120 -
121 146
    .predict = function(task = NULL) {
122 147
      fit_object <- private$.fit_object
123 148

@@ -12,37 +12,38 @@
Loading
12 12
#'
13 13
#' @keywords data
14 14
#'
15 -
#' @return Learner object with methods for training and prediction. See
16 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
15 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
16 +
#'  methods for training and prediction. For a full list of learner
17 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
17 18
#'
18 -
#' @format \code{\link{R6Class}} object.
19 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
20 +
#'  \code{\link{Lrnr_base}}.
19 21
#'
20 22
#' @family Learners
21 23
#'
22 24
#' @section Parameters:
23 -
#' \describe{
24 -
#'   \item{\code{...}}{Not used.}
25 -
#' }
25 +
#'   - \code{...}: Not used.
26 26
#'
27 -
#' @template common_parameters
28 -
#
27 +
#' @examples
28 +
#' data(cpp_imputed)
29 +
#' covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs")
30 +
#' task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz")
31 +
#'
32 +
#' # simple, main-terms GLM
33 +
#' lrnr_mean <- make_learner(Lrnr_mean)
34 +
#' mean_fit <- lrnr_mean$train(task)
35 +
#' mean_preds <- mean_fit$predict()
29 36
Lrnr_mean <- R6Class(
30 -
  classname = "Lrnr_mean", inherit = Lrnr_base,
31 -
  portable = TRUE, class = TRUE,
37 +
  classname = "Lrnr_mean",
38 +
  inherit = Lrnr_base, portable = TRUE, class = TRUE,
32 39
  public = list(
33 40
    initialize = function(...) {
34 41
      params <- list(...)
35 42
      super$initialize(params = params, ...)
36 -
    },
37 -
38 -
    print = function() {
39 -
      print(self$name)
40 43
    }
41 44
  ),
42 -
43 45
  private = list(
44 46
    .properties = c("continuous", "binomial", "categorical", "weights", "offset"),
45 -
46 47
    .train = function(task) {
47 48
      outcome_type <- self$get_outcome_type(task)
48 49
      y <- outcome_type$format(task$Y)
@@ -75,7 +76,6 @@
Loading
75 76
76 77
      return(fit_object)
77 78
    },
78 -
79 79
    .predict = function(task = NULL) {
80 80
      predictions <- rep(private$.fit_object$mean, task$nrow)
81 81

@@ -39,10 +39,8 @@
Loading
39 39
      super$initialize(params = params, ...)
40 40
    }
41 41
  ),
42 -
43 42
  private = list(
44 43
    .properties = c("density"),
45 -
46 44
    .train = function(task) {
47 45
      mean_learner <- self$params$mean_learner
48 46
      mean_fit <- mean_learner$train(task)
@@ -54,7 +52,6 @@
Loading
54 52
      fit_object <- list(mean_fit = mean_fit, dens_fit = dens_fit)
55 53
      return(fit_object)
56 54
    },
57 -
58 55
    .predict = function(task) {
59 56
      mean_fit <- self$fit_object$mean_fit
60 57
      dens_fit <- self$fit_object$dens_fit

@@ -4,7 +4,7 @@
Loading
4 4
#' the \pkg{xgboost} package, via \code{\link[xgboost]{xgb.train}}. Such
5 5
#' models are classification and regression trees with extreme gradient
6 6
#' boosting. For details on the fitting procedure, consult the documentation of
7 -
#' the \pkg{xgboost}.
7 +
#' the \pkg{xgboost} and \insertCite{xgboost;textual}{sl3}).
8 8
#'
9 9
#' @docType class
10 10
#'
@@ -15,22 +15,44 @@
Loading
15 15
#'
16 16
#' @keywords data
17 17
#'
18 -
#' @return Learner object with methods for training and prediction. See
19 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
18 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
19 +
#'  methods for training and prediction. For a full list of learner
20 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
20 21
#'
21 -
#' @format \code{\link{R6Class}} object.
22 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
23 +
#'  \code{\link{Lrnr_base}}.
22 24
#'
23 25
#' @family Learners
24 26
#'
27 +
#' @seealso [Lrnr_gbm] for standard gradient boosting models (via the \pkg{gbm}
28 +
#'  package) and [Lrnr_lightgbm] for the faster and more efficient gradient
29 +
#'  boosted trees from the LightGBM framework (via the \pkg{lightgbm} package).
30 +
#'
25 31
#' @section Parameters:
26 -
#' \describe{
27 -
#'   \item{\code{nrounds=20}}{Number of fitting iterations.}
28 -
#'   \item{\code{...}}{Other parameters passed to
29 -
#'     \code{\link[xgboost]{xgb.train}}.}
30 -
#' }
32 +
#'   - \code{nrounds=20}: Number of fitting iterations.
33 +
#'   - \code{...}: Other parameters passed to \code{\link[xgboost]{xgb.train}}.
34 +
#'
35 +
#' @references
36 +
#'  \insertAllCited{}
37 +
#'
38 +
#' @examples
39 +
#' data(mtcars)
40 +
#' mtcars_task <- sl3_Task$new(
41 +
#'   data = mtcars,
42 +
#'   covariates = c(
43 +
#'     "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am",
44 +
#'     "gear", "carb"
45 +
#'   ),
46 +
#'   outcome = "mpg"
47 +
#' )
48 +
#'
49 +
#' # initialization, training, and prediction with the defaults
50 +
#' xgb_lrnr <- Lrnr_xgboost$new()
51 +
#' xgb_fit <- xgb_lrnr$train(mtcars_task)
52 +
#' xgb_preds <- xgb_fit$predict()
31 53
#'
32 -
#' @template common_parameters
33 -
#
54 +
#' # get feature importance from fitted model
55 +
#' xgb_varimp <- xgb_fit$importance()
34 56
Lrnr_xgboost <- R6Class(
35 57
  classname = "Lrnr_xgboost", inherit = Lrnr_base,
36 58
  portable = TRUE, class = TRUE,
@@ -52,13 +74,11 @@
Loading
52 74
      return(importance_result)
53 75
    }
54 76
  ),
55 -
56 77
  private = list(
57 78
    .properties = c(
58 79
      "continuous", "binomial", "categorical", "weights",
59 80
      "offset", "importance"
60 81
    ),
61 -
62 82
    .train = function(task) {
63 83
      args <- self$params
64 84
@@ -125,7 +145,6 @@
Loading
125 145
126 146
      return(fit_object)
127 147
    },
128 -
129 148
    .predict = function(task = NULL) {
130 149
      fit_object <- private$.fit_object
131 150
@@ -175,7 +194,6 @@
Loading
175 194
176 195
      return(predictions)
177 196
    },
178 -
179 197
    .required_packages = c("xgboost")
180 198
  )
181 199
)

@@ -18,10 +18,8 @@
Loading
18 18
      super$initialize(params = params, ...)
19 19
    }
20 20
  ),
21 -
22 21
  private = list(
23 22
    .properties = c("binomial", "continuous", "weights", "wrapper"),
24 -
25 23
    .train = function(task) {
26 24
      method <- self$params$SL_wrapper
27 25
      X <- as.matrix(task$X)
@@ -34,7 +32,6 @@
Loading
34 32
      )
35 33
      return(fit_object)
36 34
    },
37 -
38 35
    .predict = function(task) {
39 36
      coef <- private$.fit_object$coef
40 37
      X <- as.matrix(task$X)

@@ -50,13 +50,11 @@
Loading
50 50
      super$initialize(params = params, ...)
51 51
    }
52 52
  ),
53 -
54 53
  private = list(
55 54
    .properties = c(
56 55
      "continuous", "binomial", "categorical", "weights",
57 56
      "offset"
58 57
    ),
59 -
60 58
    .train = function(task) {
61 59
      verbose <- getOption("sl3.verbose")
62 60
      params <- self$params
@@ -102,7 +100,6 @@
Loading
102 100
      fit_object$name <- "optim"
103 101
      return(fit_object)
104 102
    },
105 -
106 103
    .predict = function(task = NULL) {
107 104
      verbose <- getOption("sl3.verbose")
108 105

@@ -35,18 +35,15 @@
Loading
35 35
      super$initialize(...)
36 36
    }
37 37
  ),
38 -
39 38
  private = list(
40 39
    .train = function(task) {
41 40
      fit_object <- list()
42 41
      return(fit_object)
43 42
    },
44 -
45 43
    .predict = function(task = NULL) {
46 44
      # nothing to do here: we're relying on Lrnr_base to subset covariates
47 45
      return(task$X)
48 46
    },
49 -
50 47
    .chain = function(task = NULL) {
51 48
      # nothing to do here: we're relying on Lrnr_base to subset covariates
52 49
      return(task)

@@ -39,10 +39,8 @@
Loading
39 39
      super$initialize(params = params, ...)
40 40
    }
41 41
  ),
42 -
43 42
  private = list(
44 43
    .properties = c("categorical"),
45 -
46 44
    .train = function(task) {
47 45
      outcome_type <- self$get_outcome_type(task)
48 46
@@ -62,7 +60,6 @@
Loading
62 60
      )
63 61
      return(fit_object)
64 62
    },
65 -
66 63
    .predict = function(task) {
67 64
      pred_hazards_task <- pooled_hazard_task(task, trim = FALSE)
68 65
      raw_preds <- self$fit_object$hazards_fit$predict(pred_hazards_task)

@@ -17,33 +17,65 @@
Loading
17 17
#'
18 18
#' @family Learners
19 19
#'
20 -
#' @return \code{\link{Lrnr_base}} object with methods for training and prediction.
20 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
21 +
#' methods for training and prediction. For a full list of learner
22 +
#' functionality, see the complete documentation of \code{\link{Lrnr_base}}.
21 23
#'
22 -
#' @format \code{\link{R6Class}} object.
24 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
25 +
#'  \code{\link{Lrnr_base}}.
23 26
#'
24 27
#' @section Parameters:
25 -
#' \describe{
26 -
#'  \item{\code{batch_size}}{How many times should the training data be used to
27 -
#'  train the neural network?}
28 -
#'  \item{\code{units}}{Positive integer, dimensionality of the output space.}
29 -
#'  \item{\code{dropout}}{Float between 0 and 1. Fraction of the input units to
30 -
#'  drop.}
31 -
#'  \item{\code{recurrent_dropout}}{Float between 0 and 1. Fraction of the
32 -
#'  units to drop for the linear transformation of the recurrent state.}
33 -
#'  \item{\code{activation}}{Activation function to use. If you pass NULL, no
34 -
#'  activation is applied (e.g., "linear" activation: \code{a(x) = x}).}
35 -
#'  \item{\code{recurrent_activation}}{Activation function to use for the
36 -
#'  recurrent step.}
37 -
#'  \item{\code{recurrent_out}}{Activation function to use for the output step.}
38 -
#'  \item{\code{epochs}}{Number of epochs to train the model.}
39 -
#'  \item{\code{lr}}{Learning rate.}
40 -
#'  \item{\code{layers}}{How many lstm layers. Only allows for 1 or 2.}
41 -
#'  \item{\code{callbacks}}{List of callbacks, which is a set of functions to
42 -
#'  be applied at given stages of the training procedure. Default callback
43 -
#'  function \code{callback_early_stopping} stops training if the validation
44 -
#'  loss does not improve across \code{patience} number of epochs.}
45 -
#'  }
28 +
#'   - \code{batch_size}: How many times should the training data be used to
29 +
#'       train the neural network?
30 +
#'   - \code{units}: Positive integer, dimensionality of the output space.
31 +
#'   - \code{dropout}: Float between 0 and 1. Fraction of the input units to
32 +
#'       drop.
33 +
#'   - \code{recurrent_dropout}: Float between 0 and 1. Fraction of the units
34 +
#'       to drop for the linear transformation of the recurrent state.
35 +
#'   - \code{activation}: Activation function to use. If you pass NULL, no
36 +
#'       activation is applied (e.g., "linear" activation: \code{a(x) = x}).
37 +
#'   - \code{recurrent_activation}: Activation function to use for the
38 +
#'       recurrent step.
39 +
#'   - \code{recurrent_out}: Activation function to use for the output step.
40 +
#'   - \code{epochs}: Number of epochs to train the model.
41 +
#'   - \code{lr}: Learning rate.
42 +
#'   - \code{layers}: How many LSTM layers. Only allows for 1 or 2.
43 +
#'   - \code{callbacks}: List of callbacks, which is a set of functions to
44 +
#'   be applied at given stages of the training procedure. Default callback
45 +
#'   function \code{callback_early_stopping} stops training if the validation
46 +
#'   loss does not improve across \code{patience} number of epochs.
47 +
#'   - \code{...}: Other parameters passed to \code{\link[keras]{keras}}.
48 +
#'   
49 +
#' @examples
50 +
#' \dontrun{
51 +
#' library(origami)
52 +
#' data(bsds)
46 53
#'
54 +
#' # make folds appropriate for time-series cross-validation
55 +
#' folds <- make_folds(bsds,
56 +
#'   fold_fun = folds_rolling_window, window_size = 500,
57 +
#'   validation_size = 100, gap = 0, batch = 50
58 +
#' )
59 +
#'
60 +
#' # build task by passing in external folds structure
61 +
#' task <- sl3_Task$new(
62 +
#'   data = bsds,
63 +
#'   folds = folds,
64 +
#'   covariates = c(
65 +
#'     "weekday", "temp"
66 +
#'   ),
67 +
#'   outcome = "cnt"
68 +
#' )
69 +
#' 
70 +
#' # create tasks for taining and validation (simplifed example)
71 +
#' train_task <- training(task, fold = task$folds[[1]])
72 +
#' valid_task <- validation(task, fold = task$folds[[1]])
73 +
#'
74 +
#' # instantiate learner, then fit and predict (simplifed example)
75 +
#' lstm_lrnr <- Lrnr_lstm_keras$new(batch_size = 1, epochs = 200)
76 +
#' lstm_fit <- lstm_lrnr$train(train_task)
77 +
#' lstm_preds <- lstm_fit$predict(valid_task)
78 +
#' } 
47 79
Lrnr_lstm_keras <- R6Class(
48 80
  classname = "Lrnr_lstm_keras",
49 81
  inherit = Lrnr_base,
@@ -159,7 +191,6 @@
Loading
159 191
160 192
      return(fit_object)
161 193
    },
162 -
163 194
    .predict = function(task = NULL) {
164 195
      args <- self$params
165 196

@@ -91,11 +91,9 @@
Loading
91 91
      params <- list(learner = learner, folds = folds, full_fit = full_fit, ...)
92 92
      super$initialize(params = params, ...)
93 93
    },
94 -
95 94
    cv_risk = function(loss_fun) {
96 95
      return(cv_risk(self, loss_fun))
97 96
    },
98 -
99 97
    print = function() {
100 98
      print("Lrnr_cv")
101 99
      print(self$params$learner)
@@ -175,7 +173,6 @@
Loading
175 173
        column_names = new_col_names
176 174
      ))
177 175
    },
178 -
179 176
    update = function(task, drop_old = FALSE) {
180 177
      if (!self$is_trained) {
181 178
        return(self$base_train(task))
@@ -228,16 +225,13 @@
Loading
228 225
      return(new_object)
229 226
    }
230 227
  ),
231 -
232 228
  active = list(
233 229
    name = function() {
234 230
      name <- paste("CV", self$params$learner$name, sep = "_")
235 231
    }
236 232
  ),
237 -
238 233
  private = list(
239 234
    .properties = c("wrapper", "cv"),
240 -
241 235
    .train_sublearners = function(task) {
242 236
      verbose <- getOption("sl3.verbose")
243 237
@@ -281,7 +275,6 @@
Loading
281 275
      results <- list(full_fit = full_fit, fold_fits = bundle_delayed(cv_results))
282 276
      return(bundle_delayed(results))
283 277
    },
284 -
285 278
    .train = function(task, trained_sublearners) {
286 279
      # prefer folds from params, but default to folds from task
287 280
      folds <- self$params$folds
@@ -340,7 +333,6 @@
Loading
340 333
      )
341 334
      return(fit_object)
342 335
    },
343 -
344 336
    .predict = function(task) {
345 337
      folds <- task$folds
346 338
      fold_fits <- private$.fit_object$fold_fits

@@ -39,7 +39,6 @@
Loading
39 39
      super$initialize(params = params, ...)
40 40
    }
41 41
  ),
42 -
43 42
  active = list(
44 43
    name = function() {
45 44
      learner_name <- self$params$learner$name
@@ -75,7 +74,6 @@
Loading
75 74
      )
76 75
      return(fit_object)
77 76
    },
78 -
79 77
    .predict = function(task) {
80 78
      predict_univariate_learner <- function(outcome_col, outcome_fits, task) {
81 79
        univariate_task <- task$next_in_chain(outcome = outcome_col)

@@ -1,35 +1,51 @@
Loading
1 1
#' Non-negative Linear Least Squares
2 2
#'
3 -
#' This learner provides fitting procedures for models with non-negative linear
4 -
#' least squares, internally using the \code{nnls} package and
3 +
#' This learner provides fitting procedures for models via non-negative linear
4 +
#' least squares regression, using \pkg{nnls} package's
5 5
#' \code{\link[nnls]{nnls}} function.
6 6
#'
7 7
#' @docType class
8 8
#'
9 9
#' @importFrom R6 R6Class
10 -
#' @importFrom assertthat assert_that is.count is.flag
10 +
#' @importFrom data.table data.table
11 11
#'
12 12
#' @export
13 13
#'
14 14
#' @keywords data
15 15
#'
16 -
#' @return Learner object with methods for both training and prediction. See
17 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
16 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
17 +
#'  methods for training and prediction. For a full list of learner
18 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
18 19
#'
19 -
#' @format \code{\link{R6Class}} object.
20 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
21 +
#'  \code{\link{Lrnr_base}}.
20 22
#'
21 23
#' @family Learners
22 24
#'
23 25
#' @section Parameters:
24 -
#' \describe{
25 -
#'   \item{\code{convex = FALSE}}{Normalize the coefficients to be a convex
26 -
#'     combination}
27 -
#'   \item{\code{...}}{Other parameters passed to
28 -
#'     \code{\link[nnls]{nnls}}.}
29 -
#' }
26 +
#'   - \code{convex = FALSE}: Normalize the coefficients to be a convex
27 +
#'       combination.
28 +
#'   - \code{...}: Other parameters passed to \code{\link[nnls]{nnls}}.
30 29
#'
31 -
#' @template common_parameters
32 -
#
30 +
#' @examples
31 +
#' data(cpp_imputed)
32 +
#' covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs")
33 +
#' task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz")
34 +
#'
35 +
#' lrnr_nnls <- make_learner(Lrnr_nnls)
36 +
#' nnls_fit <- lrnr_nnls$train(task)
37 +
#' nnls_preds <- nnls_fit$predict()
38 +
#'
39 +
#' # NNLS is commonly used as a metalearner in a super learner (i.e., Lrnr_sl)
40 +
#' lrnr_glm <- make_learner(Lrnr_glm)
41 +
#' lrnr_glmnet <- Lrnr_glmnet$new()
42 +
#' lrnr_mean <- Lrnr_mean$new()
43 +
#' learners <- c(lrnr_glm, lrnr_glmnet, lrnr_mean)
44 +
#' names(learners) <- c("glm", "lasso", "mean") # optional, renaming learners
45 +
#' simple_learner_stack <- make_learner(Stack, learners)
46 +
#' sl <- Lrnr_sl$new(learners = simple_learner_stack, metalearner = lrnr_nnls)
47 +
#' sl_fit <- sl$train(task)
48 +
#' sl_preds <- sl_fit$predict()
33 49
Lrnr_nnls <- R6Class(
34 50
  classname = "Lrnr_nnls", inherit = Lrnr_base,
35 51
  portable = TRUE, class = TRUE,
@@ -43,7 +59,6 @@
Loading
43 59
      print(self$fits)
44 60
    }
45 61
  ),
46 -
47 62
  active = list(
48 63
    fits = function() {
49 64
      fit_object <- private$.fit_object
@@ -54,7 +69,6 @@
Loading
54 69
      }
55 70
    }
56 71
  ),
57 -
58 72
  private = list(
59 73
    .properties = c("continuous"),
60 74
    .train = function(task) {
@@ -77,7 +91,6 @@
Loading
77 91
      }
78 92
      return(fit_object)
79 93
    },
80 -
81 94
    .predict = function(task = NULL) {
82 95
      predictions <- as.matrix(task$X) %*% coef(private$.fit_object)
83 96
      return(predictions)

@@ -23,10 +23,8 @@
Loading
23 23
      super$initialize(params = params, ...)
24 24
    }
25 25
  ),
26 -
27 26
  private = list(
28 27
    .properties = c("binomial", "continuous", "weights", "ids", "wrapper"),
29 -
30 28
    .train = function(task) {
31 29
      args <- self$params
32 30
      outcome_type <- self$get_outcome_type(task)
@@ -54,11 +52,9 @@
Loading
54 52
      fit_object <- list(selected = covariates[covariate_selected])
55 53
      return(fit_object)
56 54
    },
57 -
58 55
    .predict = function(task) {
59 56
      task$X[, private$.fit_object$selected, with = FALSE, drop = FALSE]
60 57
    },
61 -
62 58
    .chain = function(task) {
63 59
      return(task$next_in_chain(covariates = private$.fit_object$selected))
64 60
    },

@@ -1,4 +1,4 @@
Loading
1 -
#' Exponential Smoothing
1 +
#' Exponential Smoothing state space model
2 2
#'
3 3
#' This learner supports exponential smoothing models using
4 4
#' \code{\link[forecast]{ets}}.
@@ -20,52 +20,75 @@
Loading
20 20
#' @family Learners
21 21
#'
22 22
#' @section Parameters:
23 -
#' \describe{
24 -
#'   \item{\code{model="ZZZ"}}{Three-character string identifying method. In all
23 +
#'  - \code{model="ZZZ"}: Three-character string identifying method. In all
25 24
#'     cases, "N"=none, "A"=additive, "M"=multiplicative, and "Z"=automatically
26 25
#'     selected. The first letter denotes the error type, second letter denotes
27 26
#'     the trend type, third letter denotes the season type. For example, "ANN"
28 27
#'     is simple exponential smoothing with additive errors, "MAM" is
29 -
#'     multiplicative Holt-Winters' methods with multiplicative errors, etc.}
30 -
#'   \item{\code{damped=NULL}}{If TRUE, use a damped trend (either additive or
28 +
#'     multiplicative Holt-Winters' methods with multiplicative errors, etc.
29 +
#'  - \code{damped=NULL}: If TRUE, use a damped trend (either additive or
31 30
#'     multiplicative). If NULL, both damped and non-damped trends will be tried
32 -
#'     and the best model (according to the information criterion ic) returned.}
33 -
#'   \item{\code{alpha=NULL}}{Value of alpha. If NULL, it is estimated.}
34 -
#'   \item{\code{beta=NULL}}{Value of beta. If NULL, it is estimated.}
35 -
#'   \item{\code{gamma=NULL}}{Value of gamma. If NULL, it is estimated.}
36 -
#'   \item{\code{phi=NULL}}{Value of phi. If NULL, it is estimated.}
37 -
#'   \item{\code{lambda=NULL}}{Box-Cox transformation parameter. Ignored if
38 -
#'     \code{NULL}. When lambda is specified, \code{additive.only} is set to
39 -
#'     \code{TRUE}.}
40 -
#'   \item{\code{additive.only=FALSE}}{If \code{TRUE}, will only consider
41 -
#'     additive models.}
42 -
#'   \item{\code{biasadj=FALSE}}{Use adjusted back-transformed mean for Box-Cox
43 -
#'     transformations.}
44 -
#'   \item{\code{lower=c(rep(1e-04, 3), 0.8)}}{Lower bounds for the parameters
45 -
#'     (alpha, beta, gamma, phi).}
46 -
#'   \item{\code{upper=c(rep(0.9999,3), 0.98)}}{Upper bounds for the parameters
47 -
#'     (alpha, beta, gamma, phi)}
48 -
#'   \item{\code{opt.crit="lik"}}{Optimization criterion.}
49 -
#'   \item{\code{nmse=3}}{Number of steps for average multistep MSE (1 <= nmse
50 -
#'     <= 30).}
51 -
#'   \item{\code{bounds="both"}}{Type of parameter space to impose: "usual"
31 +
#'     and the best model (according to the information criterion ic) returned.
32 +
#'  - \code{alpha=NULL}: Value of alpha. If NULL, it is estimated.
33 +
#'  - \code{beta=NULL}: Value of beta. If NULL, it is estimated.
34 +
#'  - \code{gamma=NULL}: Value of gamma. If NULL, it is estimated.
35 +
#'  - \code{phi=NULL}: Value of phi. If NULL, it is estimated.
36 +
#'  - \code{lambda=NULL}: Box-Cox transformation parameter. Ignored if
37 +
#'    \code{NULL}. When lambda is specified, \code{additive.only} is set to
38 +
#'    \code{TRUE}.
39 +
#'  - \code{additive.only=FALSE}: If \code{TRUE}, will only consider
40 +
#'     additive models.
41 +
#'  - \code{biasadj=FALSE}: Use adjusted back-transformed mean for Box-Cox
42 +
#'     transformations.
43 +
#'  - \code{lower=c(rep(1e-04, 3), 0.8)}: Lower bounds for the parameters
44 +
#'     (alpha, beta, gamma, phi).
45 +
#'  - \code{upper=c(rep(0.9999,3), 0.98)}: Upper bounds for the parameters
46 +
#'     (alpha, beta, gamma, phi)
47 +
#'  - \code{opt.crit="lik"}: Optimization criterion.
48 +
#'  - \code{nmse=3}: Number of steps for average multistep MSE (1 <= nmse
49 +
#'     <= 30).
50 +
#'  - \code{bounds="both"}" Type of parameter space to impose: "usual"
52 51
#'     indicates all parameters must lie between specified lower and upper
53 52
#'     bounds; "admissible" indicates parameters must lie in the admissible
54 -
#'     space; "both" (default) takes the intersection of these regions.}
55 -
#'   \item{\code{ic="aic"}}{Information criterion to be used in model
56 -
#'     selection.}
57 -
#'   \item{\code{restrict=TRUE}}{If TRUE, models with infinite variance will not
58 -
#'     be allowed.}
59 -
#'   \item{\code{allow.multiplicative.trend=FALSE}}{If TRUE, models with
60 -
#'     multiplicative trend are allowed when searching for a model.}
61 -
#'   \item{\code{use.initial.values=FALSE}}{If \code{TRUE} and model is of class
62 -
#'     "ets", then the initial values in the model are also not re-estimated.}
63 -
#'   \item{\code{n.ahead}}{The forecast horizon. If not specified, returns
64 -
#'     forecast of size \code{task$X}.}
65 -
#'   \item{\code{freq=1}}{the number of observations per unit of time.}
66 -
#'   \item{\code{...}}{Other parameters passed to \code{\link[forecast]{ets}.}}
67 -
#' }
68 -
#
53 +
#'     space; "both" (default) takes the intersection of these regions.
54 +
#'  - \code{ic="aic"}: Information criterion to be used in model
55 +
#'     selection.
56 +
#'  - \code{restrict=TRUE}: If TRUE, models with infinite variance will not
57 +
#'     be allowed.
58 +
#'  - \code{allow.multiplicative.trend=FALSE}: If TRUE, models with
59 +
#'     multiplicative trend are allowed when searching for a model.
60 +
#'  - \code{use.initial.values=FALSE}: If \code{TRUE} and model is of class
61 +
#'     "ets", then the initial values in the model are also not re-estimated.
62 +
#'  - \code{n.ahead}: The forecast horizon. If not specified, returns
63 +
#'     forecast of size \code{task$X}.
64 +
#'  - \code{freq=1}: the number of observations per unit of time.
65 +
#'  - \code{...}: Other parameters passed to \code{\link[forecast]{ets}.}
66 +
#'
67 +
#' @examples
68 +
#' library(origami)
69 +
#' data(bsds)
70 +
#'
71 +
#' folds <- make_folds(bsds,
72 +
#'   fold_fun = folds_rolling_window, window_size = 500,
73 +
#'   validation_size = 100, gap = 0, batch = 50
74 +
#' )
75 +
#'
76 +
#' task <- sl3_Task$new(
77 +
#'   data = bsds,
78 +
#'   folds = folds,
79 +
#'   covariates = c(
80 +
#'     "weekday", "temp"
81 +
#'   ),
82 +
#'   outcome = "cnt"
83 +
#' )
84 +
#'
85 +
#' expSmooth_lrnr <- make_learner(Lrnr_expSmooth)
86 +
#'
87 +
#' train_task <- training(task, fold = task$folds[[1]])
88 +
#' valid_task <- validation(task, fold = task$folds[[1]])
89 +
#'
90 +
#' expSmooth_fit <- expSmooth_lrnr$train(train_task)
91 +
#' expSmooth_preds <- expSmooth_fit$predict(valid_task)
69 92
Lrnr_expSmooth <- R6Class(
70 93
  classname = "Lrnr_expSmooth",
71 94
  inherit = Lrnr_base,
@@ -84,17 +107,14 @@
Loading
84 107
      super$initialize(params = args_to_list(), ...)
85 108
    }
86 109
  ),
87 -
88 110
  private = list(
89 111
    .properties = c("timeseries", "continuous"),
90 -
91 112
    .train = function(task) {
92 113
      args <- self$params
93 114
      args$y <- ts(task$Y, frequency = args$freq)
94 115
      fit_object <- call_with_args(forecast::ets, args)
95 116
      return(fit_object)
96 117
    },
97 -
98 118
    .predict = function(task = NULL) {
99 119
      h <- ts_get_pred_horizon(self$training_task, task)
100 120
      raw_preds <- forecast::forecast(private$.fit_object, h = h)

@@ -43,10 +43,8 @@
Loading
43 43
      super$initialize(params = params, ...)
44 44
    }
45 45
  ),
46 -
47 46
  private = list(
48 47
    .properties = c("continuous", "binomial", "categorical", "weights"),
49 -
50 48
    .train = function(task) {
51 49
      args <- self$params
52 50
      outcome_type <- self$get_outcome_type(task)
@@ -70,7 +68,6 @@
Loading
70 68
      }
71 69
      return(fit_object)
72 70
    },
73 -
74 71
    .predict = function(task) {
75 72
      outcome_type <- self$get_outcome_type(task)
76 73
      if (outcome_type$type == "continuous") {

@@ -50,9 +50,12 @@
Loading
50 50
    },
51 51
    glm_family = function(return_object = FALSE) {
52 52
      type <- self$type
53 -
      family <- switch(type, continuous = "gaussian", binomial = "binomial",
53 +
      family <- switch(type,
54 +
        continuous = "gaussian",
55 +
        binomial = "binomial",
54 56
        quasibinomial = "quasibinomial",
55 -
        categorical = "multinomial", constant = "binomial",
57 +
        categorical = "multinomial",
58 +
        constant = "binomial",
56 59
        "unknown"
57 60
      )
58 61
      if (family == "unknown") {

@@ -112,10 +112,8 @@
Loading
112 112
      super$initialize(params = args_to_list(), ...)
113 113
    }
114 114
  ),
115 -
116 115
  private = list(
117 116
    .properties = c("continuous", "binomial", "weights"),
118 -
119 117
    .train = function(task) {
120 118
      args <- self$params
121 119
      outcome_type <- self$get_outcome_type(task)
@@ -146,7 +144,6 @@
Loading
146 144
147 145
      return(fit_object)
148 146
    },
149 -
150 147
    .predict = function(task) {
151 148
      outcome_type <- private$.training_outcome_type
152 149

@@ -40,7 +40,6 @@
Loading
40 40
41 41
      super$initialize(params = params, ...)
42 42
    },
43 -
44 43
    sample = function(task, n_samples = 30, fold_number = "full") {
45 44
      # TODO: fold
46 45
      # method: inverse cdf
@@ -67,7 +66,6 @@
Loading
67 66
68 67
      return(t(obs_samples))
69 68
    },
70 -
71 69
    get_outcome_range = function(task = NULL, fold_number = "full") {
72 70
      # TODO: fold
73 71
      mean_fit <- self$fit_object$mean_fit
@@ -87,10 +85,8 @@
Loading
87 85
      return(range)
88 86
    }
89 87
  ),
90 -
91 88
  private = list(
92 89
    .properties = c("density", "sampling"),
93 -
94 90
    .train = function(task) {
95 91
      mean_learner <- self$params$mean_learner
96 92
      var_learner <- self$params$var_learner
@@ -131,7 +127,6 @@
Loading
131 127
132 128
      return(fit_object)
133 129
    },
134 -
135 130
    .predict = function(task) {
136 131
      mean_fit <- self$fit_object$mean_fit
137 132
      var_fit <- self$fit_object$var_fit

@@ -79,7 +79,6 @@
Loading
79 79
      super$initialize(params = args_to_list(), ...)
80 80
    }
81 81
  ),
82 -
83 82
  private = list(
84 83
    .properties = c(
85 84
      "continuous", "binomial", "categorical", "weights",
@@ -139,7 +138,6 @@
Loading
139 138
      # fit_object@model$coefficients
140 139
      return(fit_object)
141 140
    },
142 -
143 141
    .predict = function(task = NULL) {
144 142
      verbose <- getOption("sl3.verbose")
145 143
      if (verbose) {

@@ -1,51 +1,80 @@
Loading
1 1
#' Conditional Density Estimation with the Highly Adaptive LASSO
2 2
#'
3 3
#' @docType class
4 +
#'
4 5
#' @importFrom R6 R6Class
5 6
#'
6 7
#' @export
7 8
#'
8 9
#' @keywords data
9 10
#'
10 -
#' @return Learner object with methods for training and prediction. See
11 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
11 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
12 +
#'  methods for training and prediction. For a full list of learner
13 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
12 14
#'
13 -
#' @format \code{\link{R6Class}} object.
15 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
16 +
#'  \code{\link{Lrnr_base}}.
14 17
#'
15 18
#' @family Learners
16 19
#'
17 20
#' @section Parameters:
18 -
#' \describe{
19 -
#'   \item{\code{grid_type = c("equal_range", "equal_mass")}}{\code{character}
20 -
#'    indicating the strategy to be used in creating bins along the observed
21 -
#'    support of the outcome variable. For bins of equal range, use
22 -
#'    "equal_range" (based on \code{\link[ggplot2]{cut_interval}}). To ensure
23 -
#'    each bin has the same number of observations, use "equal_mass" (based on
24 -
#'    \code{\link[ggplot2]{cut_number}}).
25 -
#'   }
26 -
#'   \item{\code{n_bins = c(3, 5)}}{Only used if \code{type} is set to
27 -
#'    \code{"equal_range"} or \code{"equal_mass"}. This \code{numeric} value
28 -
#'    indicates the number of bins that the support of the outcome variable is
29 -
#'    to be divided into.
30 -
#'   }
31 -
#'   \item{\code{lambda_seq = exp(seq(-1, -13, length = 1000L))}}{\code{numeric}
32 -
#'    sequence of values of the regulariztion parameter of the Lasso regression,
33 -
#'    to be passed to to \code{\link[hal9001]{fit_hal}}.
34 -
#'   }
35 -
#'   \item{\code{trim_dens = 1/sqrt(n)}}{A \code{numeric} giving the minimum
36 -
#'     allowed value of the resultant density predictions. Any predicted
37 -
#'     density values below this tolerance threshold are set to the indicated
38 -
#'     minimum. The default is to use the inverse of the square root of the
39 -
#'     sample size of the prediction set, i.e., 1/sqrt(n); another notable
40 -
#'     choice is 1/sqrt(n)/log(n). If there are observations in the prediction
41 -
#'     set with values of \code{new_A} outside of the support of the training
42 -
#'     set, their predictions are similarly truncated.
43 -
#'   }
44 -
#'   \item{\code{...}}{ Other parameters passed directly to
45 -
#'    \code{\link[haldensify]{haldensify}}. See its documentation for details.
46 -
#'   }
47 -
#' }
48 -
#
21 +
#'   - \code{grid_type = "equal_range"}: A \code{character} indicating the
22 +
#'       strategy to be used in creating bins along the observed support of
23 +
#'       \code{A}. For bins of equal range, use \code{"equal_range"}; consult
24 +
#'       the documentation of \code{\link[ggplot2]{cut_interval}} for further
25 +
#'       information. To ensure each bin has the same number of observations,
26 +
#'       use \code{"equal_mass"}; consult the documentation of
27 +
#'       \code{\link[ggplot2]{cut_number}} for details. The default is
28 +
#'       \code{"equal_range"} since this has been found to provide better
29 +
#'       performance in simulation experiments; however, both types may be
30 +
#'       specified (i.e., \code{c("equal_range", "equal_mass")}) together, in
31 +
#'       which case cross-validation will be used to select the optimal binning
32 +
#'       strategy.
33 +
#'   - \code{n_bins = c(3, 5)}: This \code{numeric} value indicates the number
34 +
#'       of bins into which the support of \code{A} is to be divided. As with
35 +
#'       \code{grid_type}, multiple values may be specified, in which
36 +
#'       cross-validation will be used to select the optimal number of bins.
37 +
#'   - \code{lambda_seq = exp(seq(-1, -13, length = 1000L))}: A \code{numeric}
38 +
#'       sequence of regularization parameter values of Lasso regression, which
39 +
#'       are passed to \code{\link[hal9001]{fit_hal}} via its argument
40 +
#'       \code{lambda}, itself passed to \code{\link[glmnet]{glmnet}}.
41 +
#'   - \code{trim_dens = 1/sqrt(n)}: A \code{numeric} giving the minimum
42 +
#'       allowed value of the resultant density predictions. Any predicted
43 +
#'       density values below this tolerance threshold are set to the indicated
44 +
#'       minimum. The default is to use the inverse of the square root of the
45 +
#'       sample size of the prediction set, i.e., 1/sqrt(n); another notable
46 +
#'       choice is 1/sqrt(n)/log(n). If there are observations in the
47 +
#'       prediction set with values of \code{new_A} outside of the support of
48 +
#'       the training set, their predictions are similarly truncated.
49 +
#'   - \code{...}: Other arguments to be passed directly to
50 +
#'       \code{\link[haldensify]{haldensify}}. See its documentation for
51 +
#'       details.
52 +
#'
53 +
#' @examples
54 +
#' library(dplyr)
55 +
#' data(cpp_imputed)
56 +
#' covars <- c("parity", "sexn")
57 +
#' outcome <- "haz"
58 +
#'
59 +
#' # create task
60 +
#' task <- cpp_imputed %>%
61 +
#'   slice(seq(1, nrow(.), by = 3)) %>%
62 +
#'   filter(agedays == 1) %>%
63 +
#'   sl3_Task$new(
64 +
#'     covariates = covars,
65 +
#'     outcome = outcome
66 +
#'   )
67 +
#'
68 +
#' # instantiate the learner
69 +
#' hal_dens <- Lrnr_haldensify$new(
70 +
#'   grid_type = "equal_range",
71 +
#'   n_bins = c(3, 5),
72 +
#'   lambda_seq = exp(seq(-1, -13, length = 100))
73 +
#' )
74 +
#'
75 +
#' # fit and predict densities
76 +
#' hal_dens_fit <- hal_dens$train(task)
77 +
#' hal_dens_preds <- hal_dens_fit$predict()
49 78
Lrnr_haldensify <- R6Class(
50 79
  classname = "Lrnr_haldensify", inherit = Lrnr_base,
51 80
  portable = TRUE, class = TRUE,
@@ -77,7 +106,6 @@
Loading
77 106
  ),
78 107
  private = list(
79 108
    .properties = c("density"),
80 -
81 109
    .train = function(task) {
82 110
      args <- self$params
83 111

@@ -22,20 +22,45 @@
Loading
22 22
#' @format \code{\link{R6Class}} object.
23 23
#'
24 24
#' @section Parameters:
25 -
#' \describe{
26 -
#'   \item{\code{order=NULL}}{An optional specification of the non-seasonal
27 -
#'    part of the ARIMA model: the three integer components (p, d, q) are the
25 +
#'  - \code{order}: An optional specification of the non-seasonal
26 +
#'    part of the ARIMA model; the three integer components (p, d, q) are the
28 27
#'    AR order, the degree of differencing, and the MA order. If order is
29 28
#'    specified, then \code{\link[stats]{arima}} will be called; otherwise,
30 29
#'    \code{\link[forecast]{auto.arima}} will be used to fit the "best" ARIMA
31 -
#'    model according to AIC (default), AICc or BIC. The information criterion
30 +
#'    model according to AIC (default), AIC or BIC. The information criterion
32 31
#'    to be used in \code{\link[forecast]{auto.arima}} model selection can be
33 -
#'    modified by specifying \code{ic} argument.}
34 -
#'  \item{\code{...}}{Other parameters passed to \code{\link[stats]{arima}} or
32 +
#'    modified by specifying \code{ic} argument.
33 +
#'  - \code{num_screen = 5}: The top n number of "most impotant" variables to
34 +
#'      retain.
35 +
#'  - \code{...}: Other parameters passed to \code{\link[stats]{arima}} or
35 36
#'    \code{\link[forecast]{auto.arima}} function, depending on whether or not
36 -
#'    \code{order} argument is provided.}
37 -
#' }
38 -
#
37 +
#'    \code{order} argument is provided.
38 +
#'
39 +
#' @examples
40 +
#' library(origami)
41 +
#' data(bsds)
42 +
#'
43 +
#' folds <- make_folds(bsds,
44 +
#'   fold_fun = folds_rolling_window, window_size = 500,
45 +
#'   validation_size = 100, gap = 0, batch = 50
46 +
#' )
47 +
#'
48 +
#' task <- sl3_Task$new(
49 +
#'   data = bsds,
50 +
#'   folds = folds,
51 +
#'   covariates = c(
52 +
#'     "weekday", "temp"
53 +
#'   ),
54 +
#'   outcome = "cnt"
55 +
#' )
56 +
#'
57 +
#' arima_lrnr <- make_learner(Lrnr_arima)
58 +
#'
59 +
#' train_task <- training(task, fold = task$folds[[1]])
60 +
#' valid_task <- validation(task, fold = task$folds[[1]])
61 +
#'
62 +
#' arima_fit <- arima_lrnr$train(train_task)
63 +
#' arima_preds <- arima_fit$predict(valid_task)
39 64
Lrnr_arima <- R6Class(
40 65
  classname = "Lrnr_arima",
41 66
  inherit = Lrnr_base,
@@ -49,7 +74,6 @@
Loading
49 74
  ),
50 75
  private = list(
51 76
    .properties = c("timeseries", "continuous"),
52 -
53 77
    .train = function(task) {
54 78
      params <- self$params
55 79
@@ -85,7 +109,6 @@
Loading
85 109
86 110
      return(fit_object)
87 111
    },
88 -
89 112
    .predict = function(task = NULL) {
90 113
      fit_object <- private$.fit_object
91 114
      h <- ts_get_pred_horizon(self$training_task, task)

@@ -50,10 +50,8 @@
Loading
50 50
      super$initialize(params = params)
51 51
    }
52 52
  ),
53 -
54 53
  private = list(
55 54
    .properties = c("binomial", "continuous", "categorical", "screener"),
56 -
57 55
    .train = function(task) {
58 56
      outcome_type <- self$get_outcome_type(task)
59 57
      X <- task$X
@@ -82,11 +80,9 @@
Loading
82 80
      fit_object <- list(selected = covs[selected_covs])
83 81
      return(fit_object)
84 82
    },
85 -
86 83
    .predict = function(task) {
87 84
      task$X[, private$.fit_object$selected, with = FALSE, drop = FALSE]
88 85
    },
89 -
90 86
    .chain = function(task) {
91 87
      return(task$next_in_chain(covariates = private$.fit_object$selected))
92 88
    }

@@ -82,7 +82,6 @@
Loading
82 82
      super$initialize(params = params, ...)
83 83
    }
84 84
  ),
85 -
86 85
  private = list(
87 86
    .properties = c("timeseries", "continuous"),
88 87
    .train = function(task) {
@@ -94,7 +93,6 @@
Loading
94 93
      fit_object <- call_with_args(hts, args, silent = TRUE)
95 94
      return(fit_object)
96 95
    },
97 -
98 96
    .predict = function(task = NULL) {
99 97
      args <- self$params
100 98
      # get horizon based on training and testing tasks

@@ -138,7 +138,6 @@
Loading
138 138
139 139
      invisible(self)
140 140
    },
141 -
142 141
    add_interactions = function(interactions, warn_on_existing = TRUE) {
143 142
      ## ----------------------------------------------------------------------
144 143
      ## Add columns with interactions (by reference) to input design matrix
@@ -210,7 +209,6 @@
Loading
210 209
        column_names = interaction_cols
211 210
      ))
212 211
    },
213 -
214 212
    add_columns = function(new_data, column_uuid = uuid::UUIDgenerate()) {
215 213
      if (is.numeric(private$.row_index)) {
216 214
        new_col_map <- private$.shared_data$add_columns(
@@ -230,7 +228,6 @@
Loading
230 228
      # return an updated column_names map
231 229
      return(column_names)
232 230
    },
233 -
234 231
    next_in_chain = function(covariates = NULL, outcome = NULL, id = NULL,
235 232
                             weights = NULL, offset = NULL, time = NULL,
236 233
                             folds = NULL, column_names = NULL,
@@ -344,7 +341,6 @@
Loading
344 341
      )
345 342
      return(new_task)
346 343
    },
347 -
348 344
    get_data = function(rows = NULL, columns, expand_factors = FALSE) {
349 345
      if (missing(rows)) {
350 346
        rows <- private$.row_index
@@ -363,12 +359,10 @@
Loading
363 359
      }
364 360
      return(subset)
365 361
    },
366 -
367 362
    has_node = function(node_name) {
368 363
      node_var <- private$.nodes[[node_name]]
369 364
      return(!is.null(node_var))
370 365
    },
371 -
372 366
    get_node = function(node_name, generator_fun = NULL,
373 367
                        expand_factors = FALSE) {
374 368
      if (missing(generator_fun)) {
@@ -390,7 +384,6 @@
Loading
390 384
        }
391 385
      }
392 386
    },
393 -
394 387
    offset_transformed = function(link_fun = NULL, for_prediction = FALSE) {
395 388
      if (self$has_node("offset")) {
396 389
        offset <- self$offset
@@ -405,27 +398,22 @@
Loading
405 398
      }
406 399
      return(offset)
407 400
    },
408 -
409 401
    print = function() {
410 402
      cat(sprintf("A sl3 Task with %d obs and these nodes:\n", self$nrow))
411 403
      print(self$nodes)
412 404
    },
413 -
414 405
    revere_fold_task = function(fold_number) {
415 406
      return(self)
416 407
    }
417 408
  ),
418 -
419 409
  active = list(
420 410
    internal_data = function() {
421 411
      return(private$.shared_data)
422 412
    },
423 -
424 413
    data = function() {
425 414
      all_nodes <- unique(unlist(private$.nodes))
426 415
      return(self$get_data(, all_nodes))
427 416
    },
428 -
429 417
    nrow = function() {
430 418
      if (is.null(private$.row_index)) {
431 419
        return(private$.shared_data$nrow)
@@ -433,17 +421,14 @@
Loading
433 421
        return(length(private$.row_index))
434 422
      }
435 423
    },
436 -
437 424
    nodes = function() {
438 425
      return(private$.nodes)
439 426
    },
440 -
441 427
    X = function() {
442 428
      covariates <- private$.nodes$covariates
443 429
      X_dt <- self$get_data(, covariates, expand_factors = TRUE)
444 430
      return(X_dt)
445 431
    },
446 -
447 432
    X_intercept = function() {
448 433
      # returns X matrix with manually generated intercept column
449 434
      X_dt <- self$X
@@ -461,27 +446,22 @@
Loading
461 446
462 447
      return(X_dt)
463 448
    },
464 -
465 449
    Y = function() {
466 450
      return(self$get_node("outcome"))
467 451
    },
468 -
469 452
    offset = function() {
470 453
      return(self$get_node("offset"))
471 454
    },
472 -
473 455
    weights = function() {
474 456
      return(self$get_node("weights", function(node_var, n) {
475 457
        rep(1, n)
476 458
      }))
477 459
    },
478 -
479 460
    id = function() {
480 461
      return(self$get_node("id", function(node_var, n) {
481 462
        seq_len(n)
482 463
      }))
483 464
    },
484 -
485 465
    time = function() {
486 466
      return(self$get_node("time", function(node_var, n) {
487 467
        if (self$has_node("id")) {
@@ -491,7 +471,6 @@
Loading
491 471
        }
492 472
      }))
493 473
    },
494 -
495 474
    folds = function(new_folds) {
496 475
      if (!missing(new_folds)) {
497 476
        private$.folds <- new_folds
@@ -517,24 +496,19 @@
Loading
517 496
      }
518 497
      return(private$.folds)
519 498
    },
520 -
521 499
    uuid = function() {
522 500
      return(private$.uuid)
523 501
    },
524 -
525 502
    column_names = function() {
526 503
      return(private$.column_names)
527 504
    },
528 -
529 505
    outcome_type = function() {
530 506
      return(private$.outcome_type)
531 507
    },
532 -
533 508
    row_index = function() {
534 509
      return(private$.row_index)
535 510
    }
536 511
  ),
537 -
538 512
  private = list(
539 513
    .shared_data = NULL,
540 514
    .nodes = NULL,

@@ -82,7 +82,6 @@
Loading
82 82
      super$initialize(params = params, ...)
83 83
    }
84 84
  ),
85 -
86 85
  private = list(
87 86
    .properties = c("timeseries", "continuous"),
88 87
    .train = function(task) {
@@ -94,7 +93,6 @@
Loading
94 93
      fit_object <- call_with_args(gts, args, silent = TRUE)
95 94
      return(fit_object)
96 95
    },
97 -
98 96
    .predict = function(task = NULL) {
99 97
      args <- self$params
100 98
      # get horizon based on training and testing tasks

@@ -89,7 +89,6 @@
Loading
89 89
      super$initialize(params = params, ...)
90 90
    }
91 91
  ),
92 -
93 92
  active = list(
94 93
    name = function() {
95 94
      name <- paste(self$params$learner$name,
@@ -110,11 +109,8 @@
Loading
110 109
      }
111 110
    }
112 111
  ),
113 -
114 112
  private = list(
115 113
    .properties = c("wrapper", "cv"),
116 -
117 -
118 114
    .train = function(task) {
119 115
120 116
      # ensure task contains node for time
@@ -153,7 +149,6 @@
Loading
153 149
154 150
      return(fit_object)
155 151
    },
156 -
157 152
    .predict = function(task) {
158 153
      self$fit_object$predict(task)
159 154
    }

@@ -43,7 +43,6 @@
Loading
43 43
    .properties = c(
44 44
      "continuous", "binomial", "categorical", "weights", "wrapper"
45 45
    ),
46 -
47 46
    .train = function(task) {
48 47
      verbose <- getOption("sl3.verbose")
49 48
      params <- self$params
@@ -71,7 +70,6 @@
Loading
71 70
72 71
      return(fit_object)
73 72
    },
74 -
75 73
    .predict = function(task = NULL) {
76 74
      verbose <- getOption("sl3.verbose")
77 75
      X <- as.matrix(task$X)

@@ -40,10 +40,8 @@
Loading
40 40
      super$initialize(params = params, ...)
41 41
    }
42 42
  ),
43 -
44 43
  private = list(
45 44
    .properties = c("categorical"),
46 -
47 45
    .train = function(task) {
48 46
      outcome_type <- self$get_outcome_type(task)
49 47
@@ -80,7 +78,6 @@
Loading
80 78
      }
81 79
      return(fit_object)
82 80
    },
83 -
84 81
    .predict = function(task) {
85 82
      raw_preds <- lapply(self$fit_object, learner_fit_predict, task)
86 83
      raw_preds <- as.matrix(do.call(cbind, raw_preds))

@@ -55,7 +55,6 @@
Loading
55 55
      super$initialize(params = args_to_list(), ...)
56 56
    }
57 57
  ),
58 -
59 58
  private = list(
60 59
    .classify = FALSE,
61 60
    .return_prediction_as_vector = TRUE,
@@ -63,7 +62,6 @@
Loading
63 62
      "continuous", "binomial", "categorical", "weights",
64 63
      "offset", "h2o"
65 64
    ),
66 -
67 65
    .train = function(task) {
68 66
      verbose <- getOption("sl3.verbose")
69 67
      args <- self$params

@@ -59,7 +59,6 @@
Loading
59 59
      }
60 60
      super$initialize(params = params)
61 61
    },
62 -
63 62
    print = function() {
64 63
      if (is.null(private$.fit_object)) {
65 64
        lapply(self$params$learners, print)
@@ -67,7 +66,6 @@
Loading
67 66
        lapply(private$.fit_object, print)
68 67
      }
69 68
    },
70 -
71 69
    predict_fold = function(task, fold_number) {
72 70
73 71
      # prediction is just chaining until you get to the last fit, and then
@@ -87,7 +85,6 @@
Loading
87 85
      return(predictions)
88 86
    }
89 87
  ),
90 -
91 88
  active = list(
92 89
    name = function() {
93 90
      learners <- self$params$learners
@@ -100,7 +97,6 @@
Loading
100 97
      return(result)
101 98
    }
102 99
  ),
103 -
104 100
  private = list(
105 101
    .train_sublearners = function(task) {
106 102
      learners <- self$params$learners
@@ -116,13 +112,11 @@
Loading
116 112
      }
117 113
      return(bundle_delayed(learner_fits))
118 114
    },
119 -
120 115
    .train = function(task, trained_sublearners) {
121 116
      names(trained_sublearners) <- private$.learner_names
122 117
      fit_object <- list(learner_fits = trained_sublearners)
123 118
      return(fit_object)
124 119
    },
125 -
126 120
    .predict = function(task) {
127 121
      # prediction is just chaining until you get to the last fit, and then
128 122
      # calling predict
@@ -140,7 +134,6 @@
Loading
140 134
      predictions <- current_fit$base_predict(current_task)
141 135
      return(predictions)
142 136
    },
143 -
144 137
    .learner_names = NULL
145 138
  )
146 139
)

@@ -1,9 +1,10 @@
Loading
1 -
#' Earth - multivariate adaptive regression splines
1 +
#' Earth: Multivariate Adaptive Regression Splines
2 2
#'
3 -
#' This learner provides fitting procedures for building regression models using
4 -
#' the techniques in Friedman’s papers "Multivariate Adaptive Regres-sion
5 -
#' Splines" and "Fast MARS", using the function \code{\link[earth]{earth}} from
6 -
#' the \code{earth} package.
3 +
#' This learner provides fitting procedures for building regression models thru
4 +
#' the spline regression techniques described in
5 +
#' \insertCite{friedman1991multivariate;textual}{sl3} and
6 +
#' \insertCite{friedman1993fast;textual}{sl3}, via \pkg{earth} and the function
7 +
#' \code{\link[earth]{earth}}.
7 8
#'
8 9
#' @docType class
9 10
#'
@@ -13,49 +14,56 @@
Loading
13 14
#'
14 15
#' @export
15 16
#'
16 -
#' @keywords data
17 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
18 +
#'  methods for training and prediction. For a full list of learner
19 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
17 20
#'
18 -
#' @return Learner object with methods for training and prediction. See
19 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
20 -
#'
21 -
#' @format \code{\link{R6Class}} object.
21 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
22 +
#'  \code{\link{Lrnr_base}}.
22 23
#'
23 24
#' @family Learners
24 25
#'
26 +
#' @references
27 +
#'  \insertAllCited{}
28 +
#'
25 29
#' @section Parameters:
26 -
#' \describe{
27 -
#'   \item{\code{degree}}{A \code{numeric} specifying the maximum degree of
28 -
#'     interactions to be used in the model. This defaults to 2, specifying up
29 -
#'     through one-way interaction terms. Note that this differs from the
30 -
#'     default of \code{earth::earth}.
31 -
#'   }
32 -
#'   \item{\code{penalty}}{Generalized Cross Validation (GCV) penalty per knot.
33 -
#'     Defaults to 3 as recommended for \code{degree} > 1 in the documentation
34 -
#'     of \code{earth::earth}. Special values (for use by knowledgeable users):
35 -
#'     The value 0 penalizes only terms, not knots. The value -1 means no
36 -
#'     penalty.
37 -
#'   }
38 -
#'   \item{\code{pmethod}}{Pruning method, defaulting to \code{"backward"}.
39 -
#'     Other options include \code{"none"}, \code{"exhaustive"},
40 -
#      \code{"forward"}, \code{"seqrep"}, \code{"cv"}.
41 -
#'   }
42 -
#'   \item{\code{nfold}}{Number of cross-validation folds. Default is0, no
43 -
#'     cross validation.
44 -
#'   }
45 -
#'   \item{\code{ncross}}{Only applies if \code{nfold} > 1. Number of
46 -
#'     cross-validations. Each cross-validation has \code{nfold} folds.
47 -
#'     Defaults to 1.
48 -
#'   }
49 -
#'   \item{\code{minspan}}{Minimum number of observations between knots.
50 -
#'   }
51 -
#'   \item{\code{endspan}}{Minimum number of observations before the first and
52 -
#'     after the final knot.
53 -
#'   }
54 -
#'   \item{\code{...}}{Other parameters passed to \code{\link[earth]{earth}}.
55 -
#'     See its documentation for details.
56 -
#'   }
57 -
#' }
58 -
#
30 +
#'   - \code{degree}: A \code{numeric} specifying the maximum degree of
31 +
#'       interactions to be used in the model. This defaults to 2, specifying
32 +
#'       up through one-way interaction terms. Note that this differs from the
33 +
#'       default of \code{\link[earth]{earth}}.
34 +
#'   - \code{penalty}: Generalized Cross Validation (GCV) penalty per knot.
35 +
#'       Defaults to 3 as per the recommendation for \code{degree} > 1 in the
36 +
#'       documentation of \code{\link[earth]{earth}}. Special values (for use
37 +
#'       by knowledgeable users): The value 0 penalizes only terms, not knots.
38 +
#'       The value -1 translates to no penalty.
39 +
#'   - \code{pmethod}: Pruning method, defaulting to \code{"backward"}. Other
40 +
#'       options include \code{"none"}, \code{"exhaustive"}, \code{"forward"},
41 +
#'       \code{"seqrep"}, \code{"cv"}.
42 +
#'   - \code{nfold}: Number of cross-validation folds. The default is 0, for no
43 +
#'       cross-validation.
44 +
#'   - \code{ncross}: Only applies if \code{nfold} > 1, indicating the number
45 +
#'       of cross-validation rounds. Each cross-validation has \code{nfold}
46 +
#'       folds. Defaults to 1.
47 +
#'   - \code{minspan}: Minimum number of observations between knots.
48 +
#'   - \code{endspan}: Minimum number of observations before the first and
49 +
#'       after the final knot.
50 +
#'   - \code{...}: Other parameters passed to \code{\link[earth]{earth}}. See
51 +
#'       its documentation for details.
52 +
#'
53 +
#' @examples
54 +
#' data(cpp_imputed)
55 +
#' covars <- c(
56 +
#'   "apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn"
57 +
#' )
58 +
#' outcome <- "haz"
59 +
#' task <- sl3_Task$new(cpp_imputed,
60 +
#'   covariates = covars,
61 +
#'   outcome = outcome
62 +
#' )
63 +
#' # fit and predict from a MARS model
64 +
#' earth_lrnr <- make_learner(Lrnr_earth)
65 +
#' earth_fit <- earth_lrnr$train(task)
66 +
#' earth_preds <- earth_fit$predict(task)
59 67
Lrnr_earth <- R6Class(
60 68
  classname = "Lrnr_earth", inherit = Lrnr_base,
61 69
  portable = TRUE, class = TRUE,
@@ -67,10 +75,8 @@
Loading
67 75
      super$initialize(params = params, ...)
68 76
    }
69 77
  ),
70 -
71 78
  private = list(
72 79
    .properties = c("continuous", "binomial"),
73 -
74 80
    .train = function(task) {
75 81
      args <- self$params
76 82
      outcome_type <- self$get_outcome_type(task)
@@ -99,7 +105,6 @@
Loading
99 105
      fit_object <- call_with_args(earth_fun, args)
100 106
      return(fit_object)
101 107
    },
102 -
103 108
    .predict = function(task) {
104 109
      preds <- stats::predict(
105 110
        object = private$.fit_object, newdata = task$X,

@@ -37,7 +37,6 @@
Loading
37 37
  ),
38 38
  private = list(
39 39
    .properties = c("screener"),
40 -
41 40
    .train = function(task) {
42 41
      screener <- self$params$screener
43 42
      screener_fit <- screener$train(task)
@@ -51,11 +50,9 @@
Loading
51 50
      )
52 51
      return(fit_object)
53 52
    },
54 -
55 53
    .predict = function(task) {
56 54
      task$X[, private$.fit_object$selected, with = FALSE, drop = FALSE]
57 55
    },
58 -
59 56
    .chain = function(task) {
60 57
      return(task$next_in_chain(covariates = private$.fit_object$selected))
61 58
    },

@@ -71,7 +71,6 @@
Loading
71 71
      return(importance_result)
72 72
    }
73 73
  ),
74 -
75 74
  private = list(
76 75
    .properties = c("continuous", "binomial", "categorical", "importance"),
77 76
    .train = function(task) {

@@ -37,10 +37,8 @@
Loading
37 37
      super$initialize(params = params)
38 38
    }
39 39
  ),
40 -
41 40
  private = list(
42 41
    .properties = c("timeseries", "continuous"),
43 -
44 42
    .train = function(task) {
45 43
      args <- self$params
46 44
      learner <- args$learner
@@ -60,7 +58,6 @@
Loading
60 58
      fit_object <- call_with_args(learner_fun, args)
61 59
      return(fit_object)
62 60
    },
63 -
64 61
    .predict = function(task = NULL) {
65 62
      params <- self$params
66 63
      h <- ts_get_pred_horizon(self$training_task, task)

@@ -58,7 +58,6 @@
Loading
58 58
      )
59 59
      super$initialize(params = params, ...)
60 60
    },
61 -
62 61
    print = function() {
63 62
      lrn_names <- lapply(self$params$learners, function(obj) obj$name)
64 63
      print("SuperLearner:")
@@ -94,7 +93,6 @@
Loading
94 93
        }
95 94
      }
96 95
    },
97 -
98 96
    metalearner_fit = function() {
99 97
      self$assert_trained()
100 98
      return(private$.fit_object$cv_meta_fit$fit_object)
@@ -189,26 +187,21 @@
Loading
189 187
      return(new_object)
190 188
    }
191 189
  ),
192 -
193 190
  active = list(
194 191
    name = function() {
195 192
      name <- paste("CV", self$params$learner$name, sep = "_")
196 193
    },
197 -
198 194
    coefficients = function() {
199 195
      self$assert_trained()
200 196
      return(coef(self$fit_object$cv_meta_fit))
201 197
    },
202 -
203 198
    learner_fits = function() {
204 199
      result <- self$fit_object$full_fit$learner_fits[[1]]$learner_fits
205 200
      return(result)
206 201
    }
207 202
  ),
208 -
209 203
  private = list(
210 204
    .properties = c("wrapper", "cv"),
211 -
212 205
    .cv_risk = NULL, # store risk estimates (avoid re-calculation on print)
213 206
214 207
    .train_sublearners = function(task) {
@@ -256,7 +249,6 @@
Loading
256 249
      )
257 250
      return(bundle_delayed(fit_object))
258 251
    },
259 -
260 252
    .train = function(task, trained_sublearners) {
261 253
      fit_object <- trained_sublearners
262 254
@@ -280,7 +272,6 @@
Loading
280 272
      }
281 273
      return(fit_object[keep])
282 274
    },
283 -
284 275
    .predict = function(task) {
285 276
      full_task <- task$revere_fold_task("full")
286 277
      predictions <- private$.fit_object$full_fit$base_predict(full_task)

@@ -36,19 +36,15 @@
Loading
36 36
      super$initialize(params = params, ...)
37 37
    }
38 38
  ),
39 -
40 39
  private = list(
41 40
    .properties = c("preprocessing"),
42 -
43 41
    .train = function(task) {
44 42
      fit_object <- list(interaction_names = self$params$interactions)
45 43
      return(fit_object)
46 44
    },
47 -
48 45
    .predict = function(task = NULL) {
49 46
      stop("This learner should be used for chaining only")
50 47
    },
51 -
52 48
    .chain = function(task = NULL) {
53 49
      new_task <- task$add_interactions(
54 50
        self$params$interactions,

@@ -1,7 +1,11 @@
Loading
1 -
#' Generalized Additive Models
1 +
#' GAM: Generalized Additive Models
2 2
#'
3 -
#' This learner provides fitting procedures for generalized additive models
4 -
#' using \code{\link[mgcv]{gam}}.
3 +
#' This learner provides fitting procedures for generalized additive models,
4 +
#' using the routines from \pkg{mgcv} through a call to the function
5 +
#' \code{\link[mgcv]{gam}}. The \pkg{mgcv} package and the use of GAMs are
6 +
#' described thoroughly (with examples) in \insertCite{mgcv;textual}{sl3},
7 +
#' while \insertCite{hastie-gams;textual}{sl3} also provided an earlier quite
8 +
#' thorough look at GAMs.
5 9
#'
6 10
#' @docType class
7 11
#'
@@ -12,38 +16,51 @@
Loading
12 16
#'
13 17
#' @keywords data
14 18
#'
15 -
#' @return Learner object with methods for training and prediction. See
16 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
19 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
20 +
#'  methods for training and prediction. For a full list of learner
21 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
17 22
#'
18 -
#' @format \code{\link{R6Class}} object.
23 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
24 +
#'  \code{\link{Lrnr_base}}.
19 25
#'
20 26
#' @family Learners
21 27
#'
22 28
#' @section Parameters:
23 -
#' \describe{
24 -
#'   \item{\code{formula}}{An optional argument specifying the formula of GAM.
25 -
#'   Input type can be formula or string, or a list of them. If not specified,
26 -
#'   continuous covariates will be smoothen with the smooth terms represented
27 -
#'   using `penalized thin plate regression splines'. For a detailed
28 -
#'   description, please consult the documentation for \code{\link[mgcv]{gam}}.}
29 +
#'   - \code{formula}: An optional argument specifying the formula of GAM.
30 +
#'       Input type can be formula or string, or a list of them. If not
31 +
#'       specified, continuous covariates will be smoothened with the smooth
32 +
#'       terms represented using "penalized thin plate regression splines". For
33 +
#'       a more detailed description, please consult the documentation for
34 +
#'       \code{\link[mgcv]{gam}}.
35 +
#'   - \code{family}: An optional argument specifying the family of the GAM.
36 +
#'       See \code{\link[stats]{family}} and \code{\link[mgcv]{family.mgcv}}
37 +
#'       for a list of available family functions. If left unspecified, it will
38 +
#'       be inferred depending on the detected type of the outcome. For now,
39 +
#'       GAM supports binomial and gaussian outcome types, if \code{formula} is
40 +
#'       unspecified. For a more detailed description of this argument, please
41 +
#'       consult the documentation of \code{\link[mgcv]{gam}}.
42 +
#'   - \code{method}: An optional argument specifying the method for smoothing
43 +
#'       parameter selection. The default is global cross-validation (GCV). For
44 +
#'       more detaileds on this argument, consult the documentation of
45 +
#'       \code{\link[mgcv]{gam}}.
46 +
#'   - \code{...}: Other parameters passed to \code{\link[mgcv]{gam}}. See its
47 +
#'       documentation for details.
29 48
#'
30 -
#'   \item{\code{family}}{An optional argument specifying the family of GAM. See
31 -
#'   \code{\link{family}} and \code{\link[mgcv]{family.mgcv}} for a list of
32 -
#'   available families. If not specified, it will be inferred depending on the
33 -
#'   type of the outcome. For now the GAM supports binomial and gaussian if
34 -
#'   \code{formula} is not specified. For a detailed description, please consult
35 -
#'   the documentation for \code{\link[mgcv]{gam}}.}
49 +
#' @references
50 +
#'  \insertAllCited{}
36 51
#'
37 -
#'   \item{\code{method}}{An optional argument specifying the method for
38 -
#'   smoothing parameter selection criterion. Default is set to GCV. For a
39 -
#'   detailed description, please consult the documentation for
40 -
#'   \code{\link[mgcv]{gam}}.}
41 -
#'
42 -
#'   \item{\code{...}}{Other parameters passed to \code{\link[mgcv]{gam}}.}
43 -
#' }
44 -
#'
45 -
#' @template common_parameters
46 -
#
52 +
#' @examples
53 +
#' data(cpp_imputed)
54 +
#' # create task for prediction
55 +
#' cpp_task <- sl3_Task$new(
56 +
#'   data = cpp_imputed,
57 +
#'   covariates = c("bmi", "parity", "mage", "sexn"),
58 +
#'   outcome = "haz"
59 +
#' )
60 +
#' # initialization, training, and prediction with the defaults
61 +
#' gam_lrnr <- Lrnr_gam$new()
62 +
#' gam_fit <- gam_lrnr$train(cpp_task)
63 +
#' gam_preds <- gam_fit$predict()
47 64
Lrnr_gam <- R6Class(
48 65
  classname = "Lrnr_gam", inherit = Lrnr_base,
49 66
  portable = TRUE, class = TRUE,
@@ -56,10 +73,8 @@
Loading
56 73
      super$initialize(params = params, ...)
57 74
    }
58 75
  ),
59 -
60 76
  private = list(
61 77
    .properties = c("continuous", "binomial"),
62 -
63 78
    .train = function(task) {
64 79
      # load args
65 80
      args <- self$params
@@ -72,16 +87,15 @@
Loading
72 87
      ## family
73 88
      if (is.null(args$family)) {
74 89
        if (outcome_type$type == "continuous") {
75 -
          args$family <- gaussian
90 +
          args$family <- stats::gaussian()
76 91
        } else if (outcome_type$type == "binomial") {
77 -
          args$family <- binomial
92 +
          args$family <- stas::binomial()
78 93
        } else if (outcome_type$type == "categorical") {
79 94
          # TODO: implement categorical?
80 -
          ##      (have to specify (#{categories} - 1)
81 -
          ##       linear predictors in formula)
82 -
          stop("Categorical outcome is unsupported in Lrnr_gam for now.")
95 +
          # NOTE: must specify (#{categories}-1)+linear_predictors) in formula
96 +
          stop("Categorical outcomes are unsupported by Lrnr_gam for now.")
83 97
        } else {
84 -
          stop("Specified outcome type is unsupported in Lrnr_gam.")
98 +
          stop("Specified outcome type is unsupported by Lrnr_gam.")
85 99
        }
86 100
      }
87 101
      ## formula
@@ -146,7 +160,6 @@
Loading
146 160
      fit_object <- call_with_args(mgcv::gam, args)
147 161
      return(fit_object)
148 162
    },
149 -
150 163
    .predict = function(task) {
151 164
      # get predictions
152 165
      predictions <- stats::predict(
@@ -157,7 +170,6 @@
Loading
157 170
      predictions <- as.numeric(predictions)
158 171
      return(predictions)
159 172
    },
160 -
161 173
    .required_packages = c("mgcv")
162 174
  )
163 175
)

@@ -63,7 +63,6 @@
Loading
63 63
  ),
64 64
  private = list(
65 65
    .properties = c("continuous", "timeseries"),
66 -
67 66
    .train = function(task) {
68 67
      args <- self$params
69 68
      learners <- args$learner

@@ -1,58 +1,82 @@
Loading
1 1
#' GLMs with Elastic Net Regularization
2 2
#'
3 -
#' This learner provides fitting procedures for elastic net models, using the
4 -
#' \code{glmnet} package, using \code{\link[glmnet]{cv.glmnet}} to select an
5 -
#' appropriate value of lambda.
3 +
#' This learner provides fitting procedures for elastic net models, including
4 +
#' both lasso (L1) and ridge (L2) penalized regression, using the \pkg{glmnet}
5 +
#' package. The function \code{\link[glmnet]{cv.glmnet}} is used to select an
6 +
#' appropriate value of the regularization parameter lambda. For details on
7 +
#' these regularized regression models and \pkg{glmnet}, consider consulting
8 +
#' \insertCite{glmnet;textual}{sl3}).
6 9
#'
7 10
#' @docType class
8 11
#'
9 12
#' @importFrom R6 R6Class
10 13
#' @importFrom stats predict
11 -
#' @importFrom assertthat assert_that is.count is.flag
12 14
#'
13 15
#' @export
14 16
#'
15 17
#' @keywords data
16 18
#'
17 -
#' @return Learner object with methods for training and prediction. See
18 -
#'   \code{\link{Lrnr_base}} for documentation on learners.
19 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
20 +
#'  methods for training and prediction. For a full list of learner
21 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
19 22
#'
20 -
#' @format \code{\link{R6Class}} object.
23 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
24 +
#'  \code{\link{Lrnr_base}}.
21 25
#'
22 26
#' @family Learners
23 27
#'
24 28
#' @section Parameters:
25 -
#' \describe{
26 -
#'   \item{\code{lambda = NULL}}{An optional vector of lambda values to
27 -
#'     compare.}
28 -
#'   \item{\code{type.measure = "deviance"}}{The loss to use when selecting
29 -
#'     lambda. Options documented in \code{\link[glmnet]{cv.glmnet}}.}
30 -
#'   \item{\code{nfolds = 10}}{Number of folds to use for internal
31 -
#'     cross-validation. Smallest value allowable is 3.}
32 -
#'   \item{\code{alpha = 1}}{The elastic net parameter: \code{alpha = 0} is
33 -
#'     Ridge (L2-penalized) regression, while \code{alpha = 1} specifies Lasso
34 -
#'     (L1-penalized) regression. Values in the closed unit interval specify a
35 -
#'     weighted combination of the two penalties. This is further documented in
36 -
#'     \code{\link[glmnet]{glmnet}}.}
37 -
#'   \item{\code{nlambda = 100}}{The number of lambda values to fit. Comparing
38 -
#'     less values will speed up computation, but may decrease statistical
39 -
#'     performance. Documented in \code{\link[glmnet]{cv.glmnet}}.}
40 -
#'   \item{\code{use_min = TRUE}}{If \code{TRUE}, use
41 -
#'     \code{lambda = cv_fit$lambda.min} for prediction; otherwise, use
42 -
#'     \code{lambda = cv_fit$lambda.1se}. The distinction between these is
43 -
#'     clarified in \code{\link[glmnet]{cv.glmnet}}.}
44 -
#'   \item{\code{stratify_cv = FALSE}}{Stratify internal cross-validation
45 -
#'     folds, so that a binary outcome's prevalence for training is roughly the
46 -
#'     same in the training and validation sets of the inner cross-validation
47 -
#'     folds? This argument can only be used when the outcome type for training
48 -
#'     is binomial; and either the \code{id} node in the task is not specified,
49 -
#'     or \code{\link[glmnet]{cv.glmnet}}'s \code{foldid} argument is not
50 -
#'     specified upon initializing the learner.}
51 -
#'   \item{\code{...}}{Other parameters to be passed to
52 -
#'     \code{\link[glmnet]{cv.glmnet}} and \code{\link[glmnet]{glmnet}}.}
53 -
#' }
29 +
#'  - \code{lambda = NULL}: An optional vector of lambda values to compare.
30 +
#'  - \code{type.measure = "deviance"}: The loss to use when selecting
31 +
#'      lambda. Options documented in \code{\link[glmnet]{cv.glmnet}}.
32 +
#'  - \code{nfolds = 10}: Number of folds to use for internal cross-validation.
33 +
#'  - \code{alpha = 1}: The elastic net parameter: \code{alpha = 0} is Ridge
34 +
#'      (L2-penalized) regression, while \code{alpha = 1} specifies Lasso
35 +
#'      (L1-penalized) regression. Values in the closed unit interval specify a
36 +
#'      weighted combination of the two penalties. For further details, consult
37 +
#'      the documentation of \code{\link[glmnet]{glmnet}}.
38 +
#'  - \code{nlambda = 100}: The number of lambda values to fit. Comparing
39 +
#'      fewer values will speed up computation, but may hurt the statistical
40 +
#'      performance. For further details, consult the documentation of
41 +
#'      \code{\link[glmnet]{cv.glmnet}}.
42 +
#'  - \code{use_min = TRUE}: If \code{TRUE}, the smallest value of the lambda
43 +
#'      regularization parameter is used for prediction (i.e.,
44 +
#'      \code{lambda = cv_fit$lambda.min}); otherwise, a larger value is used
45 +
#'      (i.e., \code{lambda = cv_fit$lambda.1se}). The distinction between the
46 +
#'      two variants is clarified in the documentation of
47 +
#'      \code{\link[glmnet]{cv.glmnet}}.
48 +
#'  - \code{stratify_cv = FALSE}: Stratify internal cross-validation folds, so
49 +
#'      that a binary outcome's prevalence for training is roughly the same in
50 +
#'      the training and validation sets of the internal cross-validation
51 +
#'      folds? This argument can only be used when the outcome type for
52 +
#'      training is binomial; and either the \code{id} node in the task is not
53 +
#'      specified, or \code{\link[glmnet]{cv.glmnet}}'s \code{foldid} argument
54 +
#'      is not specified upon initializing the learner.
55 +
#'  - \code{...}: Other parameters passed to \code{\link[glmnet]{cv.glmnet}}
56 +
#'      and \code{\link[glmnet]{glmnet}}.
54 57
#'
55 -
#' @template common_parameters
58 +
#' @references
59 +
#'  \insertAllCited{}
60 +
#'
61 +
#' @examples
62 +
#' data(mtcars)
63 +
#' mtcars_task <- sl3_Task$new(
64 +
#'   data = mtcars,
65 +
#'   covariates = c(
66 +
#'     "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am",
67 +
#'     "gear", "carb"
68 +
#'   ),
69 +
#'   outcome = "mpg"
70 +
#' )
71 +
#' # simple prediction with lasso penalty
72 +
#' lasso_lrnr <- Lrnr_glmnet$new()
73 +
#' lasso_fit <- lasso_lrnr$train(mtcars_task)
74 +
#' lasso_preds <- lasso_fit$predict()
75 +
#'
76 +
#' # simple prediction with ridge penalty
77 +
#' ridge_lrnr <- Lrnr_glmnet$new(alpha = 0)
78 +
#' ridge_fit <- ridge_lrnr$train(mtcars_task)
79 +
#' ridge_preds <- ridge_fit$predict()
56 80
Lrnr_glmnet <- R6Class(
57 81
  classname = "Lrnr_glmnet",
58 82
  inherit = Lrnr_base, portable = TRUE, class = TRUE,
@@ -63,13 +87,11 @@
Loading
63 87
      super$initialize(params = args_to_list(), ...)
64 88
    }
65 89
  ),
66 -
67 90
  private = list(
68 91
    .properties = c(
69 92
      "continuous", "binomial", "categorical",
70 93
      "weights", "ids"
71 94
    ),
72 -
73 95
    .train = function(task) {
74 96
      args <- self$params
75 97
@@ -80,12 +102,7 @@
Loading
80 102
      }
81 103
82 104
      if (args$family %in% "quasibinomial") {
83 -
        args$family <- "gaussian"
84 -
        warning(paste(
85 -
          "Lrnr_glmnet doesn't understand outcome_type =",
86 -
          "'quasibinomial'; fitting glmnet with family='gaussian'",
87 -
          "instead."
88 -
        ))
105 +
        args$family <- stats::quasibinomial()
89 106
      }
90 107
91 108
      # specify data
@@ -127,7 +144,6 @@
Loading
127 144
      fit_object$glmnet.fit$call <- NULL
128 145
      return(fit_object)
129 146
    },
130 -
131 147
    .predict = function(task) {
132 148
      outcome_type <- private$.training_outcome_type
133 149

@@ -1,80 +1,91 @@
Loading
1 1
#' The Scalable Highly Adaptive Lasso
2 2
#'
3 -
#' The Highly Adaptive Lasso is an estimation procedure that generates a design
4 -
#'  matrix consisting of basis functions corresponding to covariates and
5 -
#'  interactions of covariates and fits Lasso regression to this (usually) very
6 -
#'  wide matrix, recovering a nonparametric functional form that describes the
7 -
#'  target prediction function as a composition of subset functions with finite
8 -
#'  variation norm. This implementation uses \pkg{hal9001}, which provides both
9 -
#'  a custom implementation (based on \pkg{origami}) of the cross-validated
10 -
#'  lasso as well the standard call to \code{\link[glmnet]{cv.glmnet}} from the
11 -
#'  \pkg{glmnet}.
3 +
#' The Highly Adaptive Lasso (HAL) is a nonparametric regression function that
4 +
#' has been demonstrated to optimally estimate functions with bounded (finite)
5 +
#' variation norm. The algorithm proceeds by first building an adaptive basis
6 +
#' (i.e., the HAL basis) based on indicator basis functions (or higher-order
7 +
#' spline basis functions) representing covariates and interactions of the
8 +
#' covariates up to a pre-specified degree. The fitting procedures included in
9 +
#' this learner use \code{\link[hal9001]{fit_hal}} from the \pkg{hal9001}
10 +
#' package. For details on HAL regression, consider consulting the following
11 +
#' \insertCite{benkeser2016hal;textual}{sl3}),
12 +
#' \insertCite{coyle2020hal9001-rpkg;textual}{sl3}),
13 +
#' \insertCite{hejazi2020hal9001-joss;textual}{sl3}).
12 14
#'
13 15
#' @docType class
16 +
#'
14 17
#' @importFrom R6 R6Class
18 +
#' @importFrom stats predict quasibinomial
15 19
#'
16 20
#' @export
17 21
#'
18 22
#' @keywords data
19 23
#'
20 -
#' @return Learner object with methods for training and prediction. See
21 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
24 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
25 +
#'  methods for training and prediction. For a full list of learner
26 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
22 27
#'
23 -
#' @format \code{\link{R6Class}} object.
28 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
29 +
#'  \code{\link{Lrnr_base}}.
24 30
#'
25 31
#' @family Learners
26 32
#'
27 33
#' @section Parameters:
28 -
#' \describe{
29 -
#'   \item{\code{max_degree=3}}{ The highest order of interaction
30 -
#'    terms for which the basis functions ought to be generated. The default
31 -
#'    corresponds to generating basis functions up to all 3-way interactions of
32 -
#'    covariates in the input matrix, matching the default in \pkg{hal9001}.
33 -
#'   }
34 -
#'   \item{\code{fit_type="glmnet"}}{The specific routine to be called when
35 -
#'    fitting the Lasso regression in a cross-validated manner. Choosing the
36 -
#'    \code{"glmnet"} option calls either \code{\link[glmnet]{cv.glmnet}} or
37 -
#'    \code{\link[glmnet]{glmnet}}.
38 -
#'   }
39 -
#'   \item{\code{n_folds=10}}{Integer for the number of folds to be used
40 -
#'    when splitting the data for cross-validation. This defaults to 10 as this
41 -
#'    is the convention for V-fold cross-validation.
42 -
#'   }
43 -
#'   \item{\code{use_min=TRUE}}{Determines which lambda is selected from
44 -
#'    \code{\link[glmnet]{cv.glmnet}}. \code{TRUE} corresponds to
45 -
#'    \code{"lambda.min"} and \code{FALSE} corresponds to \code{"lambda.1se"}.
46 -
#'   }
47 -
#'   \item{\code{reduce_basis=NULL}}{A \code{numeric} value bounded in the open
48 -
#'    interval (0,1) indicating the minimum proportion of ones in a basis
49 -
#'    function column needed for the basis function to be included in the
50 -
#'    procedure to fit the Lasso. Any basis functions with a lower proportion
51 -
#'    of 1's than the specified cutoff will be removed. This argument defaults
52 -
#'    to \code{NULL}, in which case all basis functions are used in the Lasso
53 -
#'    stage of HAL.
54 -
#'   }
55 -
#'   \item{\code{return_lasso=TRUE}}{A \code{logical} indicating whether or not
56 -
#'    to return the \code{\link[glmnet]{glmnet}} fit of the Lasso model.
57 -
#'   }
58 -
#'   \item{\code{return_x_basis=FALSE}}{A \code{logical} indicating whether or
59 -
#'    not to return the matrix of (possibly reduced) basis functions used in
60 -
#'    the HAL Lasso fit.
61 -
#'   }
62 -
#'   \item{\code{basis_list=NULL}}{The full set of basis functions generated
63 -
#'    from the input data (from \code{\link[hal9001]{enumerate_basis}}). The
64 -
#'    dimensionality of this structure is roughly (n * 2^(d - 1)), where n is
65 -
#'    the number of observations and d is the number of columns in the input.
66 -
#'   }
67 -
#'   \item{\code{cv_select=TRUE}}{A \code{logical} specifying whether the array
68 -
#'    of values specified should be passed to \code{\link[glmnet]{cv.glmnet}}
69 -
#'    in order to pick the optimal value (based on cross-validation) (when set
70 -
#'    to \code{TRUE}) or to fit along the sequence of values (or a single value
71 -
#'    using \code{\link[glmnet]{glmnet}} (when set to \code{FALSE}).
72 -
#'   }
73 -
#'   \item{\code{...}}{Other parameters passed directly to
74 -
#'    \code{\link[hal9001]{fit_hal}}. See its documentation for details.
75 -
#'   }
76 -
#' }
77 -
#
34 +
#'   - \code{max_degree=3}: The highest order of interaction terms for which
35 +
#'       the basis functions ought to be generated. The default corresponds to
36 +
#'       generating basis functions up to all 3-way interactions of covariates
37 +
#'       in the input matrix, matching the default in \pkg{hal9001}.
38 +
#'   - \code{fit_type="glmnet"}: The specific routine to be called when fitting
39 +
#'       the Lasso regression in a cross-validated manner. Choosing the
40 +
#'       \code{"glmnet"} option calls either \code{\link[glmnet]{cv.glmnet}} or
41 +
#'       \code{\link[glmnet]{glmnet}}.
42 +
#'   - \code{n_folds=10}: Integer for the number of folds to be used when
43 +
#'       splitting the data for cross-validation. This defaults to 10 as this
44 +
#'       is the convention for V-fold cross-validation.
45 +
#'   - \code{use_min=TRUE}: Determines which lambda is selected from
46 +
#'       \code{\link[glmnet]{cv.glmnet}}. \code{TRUE} corresponds to
47 +
#'       \code{"lambda.min"} and \code{FALSE} corresponds to
48 +
#'       \code{"lambda.1se"}.
49 +
#'   - \code{reduce_basis=NULL}: A \code{numeric} value bounded in the open
50 +
#'       interval (0,1) indicating the minimum proportion of ones in a basis
51 +
#'       function column needed for the basis function to be included in the
52 +
#'       fitting the HAL model. Any basis functions with a lower proportion of
53 +
#'       1's than the specified cutoff will be removed. This argument defaults
54 +
#'       to \code{NULL}, in which case all basis functions are used in fitting.
55 +
#'   - \code{return_lasso=TRUE}: A \code{logical} indicating whether or not to
56 +
#'       return the \code{\link[glmnet]{glmnet}} fit of the HAL model.
57 +
#'   - \code{return_x_basis=FALSE}: A \code{logical} indicating whether or not
58 +
#'       to return the matrix of (possibly reduced) basis functions used in the
59 +
#'       HAL fit.
60 +
#'   - \code{basis_list=NULL}: The full set of basis functions generated from
61 +
#'       the input data (from \code{\link[hal9001]{enumerate_basis}}). The
62 +
#'       dimensionality of this structure is roughly (n * 2^(d - 1)), where n
63 +
#'       is the number of observations and d is the number of columns.
64 +
#'   - \code{cv_select=TRUE}: A \code{logical} specifying whether the array of
65 +
#'       values specified should be passed to \code{\link[glmnet]{cv.glmnet}}
66 +
#'       in order to pick the optimal value (based on cross-validation) (when
67 +
#'       set to \code{TRUE}) or to fit along the sequence of values (or single
68 +
#'       value using \code{\link[glmnet]{glmnet}} (when set to \code{FALSE}).
69 +
#'   - \code{...}: Other parameters passed to \code{\link[hal9001]{fit_hal}}.
70 +
#'       See its documentation for details.
71 +
#'
72 +
#' @references
73 +
#'  \insertAllCited{}
74 +
#'
75 +
#' @examples
76 +
#' # load data and make task
77 +
#' data(mtcars)
78 +
#' mtcars_task <- sl3_Task$new(
79 +
#'   data = mtcars,
80 +
#'   covariates = c(
81 +
#'     "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear", "carb"
82 +
#'   ),
83 +
#'   outcome = "mpg"
84 +
#' )
85 +
#' # simple prediction with HAL
86 +
#' hal_lrnr <- Lrnr_hal9001$new()
87 +
#' hal_fit <- hal_lrnr$train(mtcars_task)
88 +
#' hal_preds <- hal_fit$predict()
78 89
Lrnr_hal9001 <- R6Class(
79 90
  classname = "Lrnr_hal9001", inherit = Lrnr_base,
80 91
  portable = TRUE, class = TRUE,
@@ -95,17 +106,22 @@
Loading
95 106
  ),
96 107
  private = list(
97 108
    .properties = c("continuous", "binomial", "weights", "ids"),
98 -
99 109
    .train = function(task) {
100 110
      args <- self$params
101 111
102 112
      outcome_type <- self$get_outcome_type(task)
103 113
104 114
      if (is.null(args$family)) {
105 -
        args$family <- args$family <- outcome_type$glm_family()
115 +
        args$family <- outcome_type$glm_family()
106 116
      }
107 117
108 -
      args$X <- as.matrix(task$X)
118 +
      if (args$family %in% "quasibinomial") {
119 +
        args$family <- stats::quasibinomial()
120 +
      }
121 +
122 +
      # NOTE: data.matrix() here instead of as.matrix() because of glmnet WTF:
123 +
      # https://stackoverflow.com/questions/8458233/r-glmnet-as-matrix-error-message
124 +
      args$X <- data.matrix(task$X)
109 125
      args$Y <- outcome_type$format(task$Y)
110 126
      args$yolo <- FALSE
111 127
@@ -123,23 +139,27 @@
Loading
123 139
124 140
      # pass in formals of glmnet versus cv.glmnet based on cv_select
125 141
      if (args$cv_select) {
126 -
        glmnet_other_valid <- union(
142 +
        glmnet_formals <- union(
127 143
          names(formals(glmnet::cv.glmnet)),
128 144
          names(formals(glmnet::glmnet))
129 145
        )
130 146
      } else {
131 -
        glmnet_other_valid <- names(formals(glmnet::glmnet))
147 +
        glmnet_formals <- names(formals(glmnet::glmnet))
132 148
      }
133 149
134 -
      # fit HAL, allowing glmnet-fitting arguments
150 +
      # fit HAL, allowing formal glmnet and cv.glmnet arguments
135 151
      fit_object <- call_with_args(
136 152
        hal9001::fit_hal, args,
137 -
        other_valid = glmnet_other_valid
153 +
        other_valid = glmnet_formals,
154 +
        ignore = c("x", "y")
138 155
      )
139 156
      return(fit_object)
140 157
    },
141 158
    .predict = function(task = NULL) {
142 -
      predictions <- predict(self$fit_object, new_data = as.matrix(task$X))
159 +
      predictions <- stats::predict(
160 +
        self$fit_object,
161 +
        new_data = data.matrix(task$X)
162 +
      )
143 163
      if (!is.na(safe_dim(predictions)[2])) {
144 164
        p <- ncol(predictions)
145 165
        colnames(predictions) <- sprintf("lambda_%0.3e", self$params$lambda)

@@ -1,21 +1,12 @@
Loading
1 -
## ------------------------------------------------------------------------
2 -
## Faster GLM with speedglm, fall back on glm.fit in case of error
3 -
## - Always use the internal fitting function (speedglm.wfit, glm.fit)
4 -
## - GLM objects are stripped of all the junk (minimal memory footprint)
5 -
## - No formula interface (design mat is the input data.table in task$X)
6 -
## - Separate interface for interactions (params[["interactions"]])
7 -
## - Can override the covariates with a subset of those in task$nodes$covariates
8 -
##   (params[["covariates"]])
9 -
## - All predictions are based on external matrix multiplication with a
10 -
##   family-based link functions
11 -
## ------------------------------------------------------------------------
12 -
13 -
#' Computationally Efficient GLMs
1 +
#' Computationally Efficient Generalized Linear Model (GLM) Fitting
14 2
#'
15 -
#' This learner provides faster fitting procedures for generalized linear models
16 -
#' using the \code{speedglm} package. Arguments are passed to
17 -
#' \code{\link[speedglm]{speedglm.wfit}}. Uses \code{\link[stats]{glm.fit}} as a
18 -
#' fallback if \code{\link[speedglm]{speedglm.wfit}} fails.
3 +
#' This learner provides faster procedures for fitting linear and generalized
4 +
#' linear models than \code{\link{Lrnr_glm}} with a minimal memory footprint.
5 +
#' This learner uses the internal fitting function provided by \pkg{speedglm}
6 +
#' package, \code{\link[speedglm]{speedglm.wfit}}. See
7 +
#' \insertCite{speedglm;textual}{sl3} for more detail. The
8 +
#' \code{\link[stats]{glm.fit}} function is used as a fallback, if
9 +
#' \code{\link[speedglm]{speedglm.wfit}} fails.
19 10
#'
20 11
#' @docType class
21 12
#'
@@ -26,46 +17,55 @@
Loading
26 17
#'
27 18
#' @keywords data
28 19
#'
29 -
#' @return Learner object with methods for training and prediction. See
30 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
20 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
21 +
#'  methods for training and prediction. For a full list of learner
22 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
31 23
#'
32 -
#' @format \code{\link{R6Class}} object.
24 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
25 +
#'  \code{\link{Lrnr_base}}.
33 26
#'
34 27
#' @family Learners
35 28
#'
36 29
#' @section Parameters:
37 -
#' \describe{
38 -
#'   \item{\code{intercept=TRUE}}{If \code{TRUE}, an intercept term is included}
39 -
#'   \item{\code{method="Cholesky"}}{The matrix decomposition method to use}
40 -
#'   \item{\code{...}}{Other parameters to be passed to
41 -
#'     \code{\link[speedglm]{speedglm.wfit}}.}
42 -
#' }
30 +
#'   - \code{intercept = TRUE}: Should an intercept be included in the model?
31 +
#'   - \code{method = "Cholesky"}: The method to check for singularity.
32 +
#'   - \code{...}: Other parameters to be passed to
33 +
#'       \code{\link[speedglm]{speedglm.wfit}}.
34 +
#'
35 +
#' @references
36 +
#'  \insertAllCited{}
43 37
#'
44 -
#' @template common_parameters
45 -
#
38 +
#' @examples
39 +
#' data(cpp_imputed)
40 +
#' covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs")
41 +
#' task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz")
42 +
#'
43 +
#' # simple, main-terms GLM
44 +
#' lrnr_glm_fast <- Lrnr_glm_fast$new(method = "eigen")
45 +
#' glm_fast_fit <- lrnr_glm_fast$train(task)
46 +
#' glm_fast_preds <- glm_fast_fit$predict()
46 47
Lrnr_glm_fast <- R6Class(
47 -
  classname = "Lrnr_glm_fast", inherit = Lrnr_base,
48 -
  portable = TRUE, class = TRUE,
48 +
  classname = "Lrnr_glm_fast",
49 +
  inherit = Lrnr_base, portable = TRUE, class = TRUE,
49 50
  public = list(
50 51
    initialize = function(intercept = TRUE, method = "Cholesky", ...) {
51 52
      super$initialize(params = args_to_list(), ...)
52 53
    }
53 54
  ),
54 -
55 55
  private = list(
56 56
    .default_params = list(method = "Cholesky"),
57 -
58 57
    .properties = c("continuous", "binomial", "weights", "offset"),
59 -
60 58
    .train = function(task) {
61 -
      verbose <- getOption("sl3.transform.offset")
59 +
      verbose <- getOption("sl3.verbose")
60 +
62 61
      args <- self$params
62 +
63 63
      outcome_type <- self$get_outcome_type(task)
64 +
      args$y <- outcome_type$format(task$Y)
64 65
65 66
      if (is.null(args$family)) {
66 67
        args$family <- outcome_type$glm_family(return_object = TRUE)
67 68
      }
68 -
69 69
      family_name <- args$family$family
70 70
      linkinv_fun <- args$family$linkinv
71 71
      link_fun <- args$family$linkfun
@@ -76,8 +76,6 @@
Loading
76 76
      } else {
77 77
        args$X <- as.matrix(task$X)
78 78
      }
79 -
      args$y <- outcome_type$format(task$Y)
80 -
      args$trace <- FALSE
81 79
82 80
      if (task$has_node("weights")) {
83 81
        args$weights <- task$weights
@@ -102,7 +100,7 @@
Loading
102 100
        if (verbose) {
103 101
          message(paste(
104 102
            "speedglm::speedglm.wfit failed, falling back on",
105 -
            "stats:glm.fit;", fit_object
103 +
            "stats::glm.fit;", fit_object
106 104
          ))
107 105
        }
108 106
        args$ctrl <- glm.control(trace = FALSE)
@@ -130,9 +128,9 @@
Loading
130 128
      fit_object$training_offset <- task$has_node("offset")
131 129
      return(fit_object)
132 130
    },
133 -
134 131
    .predict = function(task = NULL) {
135 132
      verbose <- getOption("sl3.verbose")
133 +
136 134
      if (self$params$intercept) {
137 135
        X <- task$X_intercept
138 136
      } else {

@@ -1,4 +1,4 @@
Loading
1 -
#' Base Class for all sl3 Learners.
1 +
#' Base Class for all sl3 Learners
2 2
#'
3 3
#' Generally this base learner class should not be instantiated. Intended to be
4 4
#' an abstract class, although abstract classes are not explicitly supported
@@ -8,6 +8,7 @@
Loading
8 8
#' @docType class
9 9
#'
10 10
#' @importFrom R6 R6Class
11 +
#' @importFrom Rdpack reprompt
11 12
#'
12 13
#' @export
13 14
#'
@@ -42,7 +43,6 @@
Loading
42 43
43 44
      invisible(self)
44 45
    },
45 -
46 46
    subset_covariates = function(task) {
47 47
      # learners subset task covariates based on their covariate set
48 48
      if ("covariates" %in% names(self$params) &&
@@ -95,7 +95,6 @@
Loading
95 95
        return(task)
96 96
      }
97 97
    },
98 -
99 98
    get_outcome_type = function(task) {
100 99
      outcome_type <- task$outcome_type
101 100
      if (!is.null(self$params$outcome_type)) {
@@ -108,7 +107,6 @@
Loading
108 107
      }
109 108
      return(outcome_type)
110 109
    },
111 -
112 110
    get_outcome_range = function(task = NULL, fold_number = "full") {
113 111
      # return the support of learner
114 112
      # if task is specified, return task observations based supports
@@ -136,7 +134,6 @@
Loading
136 134
      }
137 135
      return(range)
138 136
    },
139 -
140 137
    base_train = function(task, trained_sublearners = NULL) {
141 138
142 139
      # trains learner to data
@@ -154,7 +151,6 @@
Loading
154 151
      new_object$set_train(fit_object, subsetted_task)
155 152
      return(new_object)
156 153
    },
157 -
158 154
    set_train = function(fit_object, training_task) {
159 155
      private$.fit_object <- fit_object
160 156
      # for predict/chaining subset covariates to be same as training task
@@ -171,7 +167,6 @@