tlverse / sl3
Showing 69 of 364 files from the diff.
Newly tracked file
R/Lrnr_lightgbm.R created.
Newly tracked file
R/Lrnr_bayesglm.R created.
Other files ignored by Codecov
man/Lrnr_pca.Rd has changed.
man/Stack.Rd has changed.
man/Lrnr_rpart.Rd has changed.
R/Lrnr_lstm.R was deleted.
Makefile has changed.
man/Lrnr_solnp.Rd has changed.
man/Lrnr_nnls.Rd has changed.
man/Lrnr_arima.Rd has changed.
man/Lrnr_sl.Rd has changed.
man/Lrnr_bound.Rd has changed.
man/Lrnr_mean.Rd has changed.
README.Rmd has changed.
man/Lrnr_caret.Rd has changed.
docs/index.html has changed.
man/Lrnr_base.Rd has changed.
.travis.yml has changed.
docs/404.html has changed.
docs/authors.html has changed.
inst/CITATION has changed.
man/Lrnr_gbm.Rd has changed.
man/Pipeline.Rd has changed.
man/Lrnr_hts.Rd has changed.
man/Lrnr_gam.Rd has changed.
man/Lrnr_svm.Rd has changed.
man/Lrnr_grf.Rd has changed.
man/Lrnr_nnet.Rd has changed.
man/Lrnr_earth.Rd has changed.
man/Lrnr_optim.Rd has changed.
man/Lrnr_cv.Rd has changed.
appveyor.yml has changed.
man/Lrnr_glm.Rd has changed.
NAMESPACE has changed.
docs/sitemap.xml has changed.
docs/pkgdown.yml has changed.
man/Lrnr_tsDyn.Rd has changed.
man/Lrnr_gts.Rd has changed.
README.md has changed.
DESCRIPTION has changed.
NEWS.md has changed.

@@ -58,7 +58,6 @@
Loading
58 58
      "continuous", "binomial", "categorical", "weights",
59 59
      "offset"
60 60
    ),
61 -
62 61
    .train = function(task) {
63 62
      verbose <- getOption("sl3.verbose")
64 63
      params <- self$params
@@ -125,7 +124,6 @@
Loading
125 124
      fit_object$name <- "solnp"
126 125
      return(fit_object)
127 126
    },
128 -
129 127
    .predict = function(task = NULL) {
130 128
      verbose <- getOption("sl3.verbose")
131 129
      X <- as.matrix(task$X)

@@ -91,10 +91,8 @@
Loading
91 91
      super$initialize(params = args_to_list(), ...)
92 92
    }
93 93
  ),
94 -
95 94
  private = list(
96 95
    .properties = c("continuous", "binomial", "categorical", "weights"),
97 -
98 96
    .train = function(task) {
99 97
      args <- self$params
100 98
      outcome_type <- self$get_outcome_type(task)
@@ -120,7 +118,6 @@
Loading
120 118
      fit_object <- call_with_args(grf::quantile_forest, args)
121 119
      return(fit_object)
122 120
    },
123 -
124 121
    .predict = function(task) {
125 122
      # quantiles for which to predict
126 123
      quantiles_pred <- private$.params$quantiles_pred

@@ -82,7 +82,6 @@
Loading
82 82
      private$.fit_object$is_error <- is_error
83 83
    }
84 84
  ),
85 -
86 85
  active = list(
87 86
    name = function() {
88 87
      # learners = self$params$learners
@@ -96,11 +95,9 @@
Loading
96 95
      return(result)
97 96
    }
98 97
  ),
99 -
100 98
  private = list(
101 99
    # modified names of learners
102 100
    .learner_names = NULL,
103 -
104 101
    .train_sublearners = function(task) {
105 102
      # generate training subtasks
106 103
      learners <- self$params$learners

@@ -1,8 +1,11 @@
Loading
1 -
#' GBM - generalized boosted regression models
1 +
#' GBM: Generalized Boosted Regression Models
2 2
#'
3 -
#' This learner provides fitting procedures for building generalized boosted
4 -
#' regression models, using the function \code{\link[gbm]{gbm}} from the
5 -
#' \code{gbm} package.
3 +
#' This learner provides fitting procedures for generalized boosted regression
4 +
#' trees, using the routines from \pkg{gbm}, through a call to the function
5 +
#' \code{\link[gbm]{gbm.fit}}. Though a variety of gradient boosting strategies
6 +
#' have seen popularity in machine learning, a few of the early methodological
7 +
#' descriptions were given by \insertCite{friedman-gbm1;textual}{sl3} and
8 +
#' \insertCite{friedman-gbm2;textual}{sl3}.
6 9
#'
7 10
#' @docType class
8 11
#'
@@ -13,48 +16,63 @@
Loading
13 16
#'
14 17
#' @keywords data
15 18
#'
16 -
#' @return Learner object with methods for training and prediction. See
17 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
19 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
20 +
#'  methods for training and prediction. For a full list of learner
21 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
18 22
#'
19 -
#' @format \code{\link{R6Class}} object.
23 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
24 +
#'  \code{\link{Lrnr_base}}.
20 25
#'
21 26
#' @family Learners
22 27
#'
28 +
#' @seealso [Lrnr_xgboost] for the extreme gradient boosted tree models from
29 +
#'  the Xgboost framework (via the \pkg{xgboost} package) and [Lrnr_lightgbm]
30 +
#'  for the faster and more efficient gradient boosted trees from the LightGBM
31 +
#'  framework (via the \pkg{lightgbm} package).
32 +
#'
23 33
#' @section Parameters:
24 -
#' \describe{
25 -
#'   \item{\code{n.trees}}{Integer specifying the total number of trees to fit.
26 -
#'     This is equivalent to the number of iterations and the number of basis
27 -
#'     functions in the additive expansion. Default is 10000.
28 -
#'   }
29 -
#'   \item{\code{interaction.depth}}{Integer specifying the maximum depth of
30 -
#'     each tree (i.e.,  the highest level ofvariable interactions allowed).
31 -
#'     A value of 1 implies an additive model, a valueof 2 implies a model with
32 -
#'     up to 2-way interactions, etc. Default is 2.
33 -
#'   }
34 -
#'   \item{\code{shrinkage}}{A shrinkage parameter applied to each tree in the
35 -
#'     expansion. Also known asthe learning rate or step-size reduction; 0.001
36 -
#'     to 0.1 usually work, but a smallerlearning rate typically requires more
37 -
#'     trees. Default is 0.001.
38 -
#'   }
39 -
#'   \item{\code{...}}{Other parameters passed to \code{\link[gbm]{gbm}}.
40 -
#'     See its documentation for details.
41 -
#'   }
42 -
#' }
43 -
#
34 +
#'   - \code{n.trees}: An integer specifying the total number of trees to fit.
35 +
#'       This is equivalent to the number of iterations and the number of basis
36 +
#'       functions in the additive expansion. The default is 10000.
37 +
#'   - \code{interaction.depth}: An integer specifying the maximum depth of
38 +
#'       each tree (i.e., the highest level of allowed variable interactions).
39 +
#'       A value of 1 implies an additive model, while a value of 2 implies a
40 +
#'       model with up to 2-way interactions, etc. The default is 2.
41 +
#'   - \code{shrinkage}: A shrinkage parameter applied to each tree in the
42 +
#'       expansion. Also known as the learning rate or step-size reduction;
43 +
#'       values of 0.001 to 0.1 have been found to usually work, but a smaller
44 +
#'       learning rate typically requires more trees. The default is 0.001.
45 +
#'   - \code{...}: Other parameters passed to \code{\link[gbm]{gbm}}. See its
46 +
#'       documentation for details.
47 +
#'
48 +
#' @references
49 +
#'  \insertAllCited{}
50 +
#'
51 +
#' @examples
52 +
#' data(cpp_imputed)
53 +
#' # create task for prediction
54 +
#' cpp_task <- sl3_Task$new(
55 +
#'   data = cpp_imputed,
56 +
#'   covariates = c("apgar1", "apgar5", "parity", "gagebrth", "mage", "sexn"),
57 +
#'   outcome = "haz"
58 +
#' )
59 +
#'
60 +
#' # initialization, training, and prediction with the defaults
61 +
#' gbm_lrnr <- Lrnr_gbm$new()
62 +
#' gbm_fit <- gbm_lrnr$train(cpp_task)
63 +
#' gbm_preds <- gbm_fit$predict()
44 64
Lrnr_gbm <- R6Class(
45 65
  classname = "Lrnr_gbm", inherit = Lrnr_base,
46 66
  portable = TRUE, class = TRUE,
47 67
  public = list(
48 -
    initialize = function(n.trees = 10000, interaction.depth = 2,
68 +
    initialize = function(n.trees = 10000L, interaction.depth = 2,
49 69
                          shrinkage = 0.001, ...) {
50 70
      params <- args_to_list()
51 71
      super$initialize(params = params, ...)
52 72
    }
53 73
  ),
54 -
55 74
  private = list(
56 75
    .properties = c("continuous", "binomial"),
57 -
58 76
    .train = function(task) {
59 77
      args <- self$params
60 78
      outcome_type <- self$get_outcome_type(task)
@@ -69,20 +87,23 @@
Loading
69 87
        args$offset <- task$offset
70 88
      }
71 89
72 -
      if (outcome_type$type == "continuous") {
73 -
        distribution <- "gaussian"
74 -
      } else if (outcome_type$type == "binomial") {
75 -
        distribution <- "bernoulli"
76 -
      } else {
77 -
        stop("Unsupported outcome type for Lrnr_gbm.")
90 +
      if (is.null(args$distribution)) {
91 +
        if (outcome_type$type == "continuous") {
92 +
          args$distribution <- "gaussian"
93 +
        } else if (outcome_type$type == "binomial") {
94 +
          args$distribution <- "bernoulli"
95 +
        } else {
96 +
          stop("Unsupported outcome type for Lrnr_gbm.")
97 +
        }
98 +
      }
99 +
100 +
      if (is.null(args$verbose)) {
101 +
        args$verbose <- getOption("sl3.verbose")
78 102
      }
79 -
      args$distribution <- distribution
80 -
      args$verbose <- FALSE
81 103
82 104
      fit_object <- call_with_args(gbm::gbm.fit, args)
83 105
      return(fit_object)
84 106
    },
85 -
86 107
    .predict = function(task) {
87 108
      preds <- stats::predict(
88 109
        object = private$.fit_object, newdata = task$X,

@@ -39,10 +39,8 @@
Loading
39 39
      super$initialize(params = params, ...)
40 40
    }
41 41
  ),
42 -
43 42
  private = list(
44 43
    .properties = c("categorical"),
45 -
46 44
    .train = function(task) {
47 45
      outcome_type <- self$get_outcome_type(task)
48 46
@@ -62,7 +60,6 @@
Loading
62 60
      )
63 61
      return(fit_object)
64 62
    },
65 -
66 63
    .predict = function(task) {
67 64
      pred_hazards_task <- pooled_hazard_task(task, trim = FALSE)
68 65
      raw_preds <- self$fit_object$hazards_fit$predict(pred_hazards_task)

@@ -1,35 +1,51 @@
Loading
1 1
#' Non-negative Linear Least Squares
2 2
#'
3 -
#' This learner provides fitting procedures for models with non-negative linear
4 -
#' least squares, internally using the \code{nnls} package and
3 +
#' This learner provides fitting procedures for models via non-negative linear
4 +
#' least squares regression, using \pkg{nnls} package's
5 5
#' \code{\link[nnls]{nnls}} function.
6 6
#'
7 7
#' @docType class
8 8
#'
9 9
#' @importFrom R6 R6Class
10 -
#' @importFrom assertthat assert_that is.count is.flag
10 +
#' @importFrom data.table data.table
11 11
#'
12 12
#' @export
13 13
#'
14 14
#' @keywords data
15 15
#'
16 -
#' @return Learner object with methods for both training and prediction. See
17 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
16 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
17 +
#'  methods for training and prediction. For a full list of learner
18 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
18 19
#'
19 -
#' @format \code{\link{R6Class}} object.
20 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
21 +
#'  \code{\link{Lrnr_base}}.
20 22
#'
21 23
#' @family Learners
22 24
#'
23 25
#' @section Parameters:
24 -
#' \describe{
25 -
#'   \item{\code{convex = FALSE}}{Normalize the coefficients to be a convex
26 -
#'     combination}
27 -
#'   \item{\code{...}}{Other parameters passed to
28 -
#'     \code{\link[nnls]{nnls}}.}
29 -
#' }
26 +
#'   - \code{convex = FALSE}: Normalize the coefficients to be a convex
27 +
#'       combination.
28 +
#'   - \code{...}: Other parameters passed to \code{\link[nnls]{nnls}}.
30 29
#'
31 -
#' @template common_parameters
32 -
#
30 +
#' @examples
31 +
#' data(cpp_imputed)
32 +
#' covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs")
33 +
#' task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz")
34 +
#'
35 +
#' lrnr_nnls <- make_learner(Lrnr_nnls)
36 +
#' nnls_fit <- lrnr_nnls$train(task)
37 +
#' nnls_preds <- nnls_fit$predict()
38 +
#'
39 +
#' # NNLS is commonly used as a metalearner in a super learner (i.e., Lrnr_sl)
40 +
#' lrnr_glm <- make_learner(Lrnr_glm)
41 +
#' lrnr_glmnet <- Lrnr_glmnet$new()
42 +
#' lrnr_mean <- Lrnr_mean$new()
43 +
#' learners <- c(lrnr_glm, lrnr_glmnet, lrnr_mean)
44 +
#' names(learners) <- c("glm", "lasso", "mean") # optional, renaming learners
45 +
#' simple_learner_stack <- make_learner(Stack, learners)
46 +
#' sl <- Lrnr_sl$new(learners = simple_learner_stack, metalearner = lrnr_nnls)
47 +
#' sl_fit <- sl$train(task)
48 +
#' sl_preds <- sl_fit$predict()
33 49
Lrnr_nnls <- R6Class(
34 50
  classname = "Lrnr_nnls", inherit = Lrnr_base,
35 51
  portable = TRUE, class = TRUE,
@@ -43,7 +59,6 @@
Loading
43 59
      print(self$fits)
44 60
    }
45 61
  ),
46 -
47 62
  active = list(
48 63
    fits = function() {
49 64
      fit_object <- private$.fit_object
@@ -54,7 +69,6 @@
Loading
54 69
      }
55 70
    }
56 71
  ),
57 -
58 72
  private = list(
59 73
    .properties = c("continuous"),
60 74
    .train = function(task) {
@@ -77,7 +91,6 @@
Loading
77 91
      }
78 92
      return(fit_object)
79 93
    },
80 -
81 94
    .predict = function(task = NULL) {
82 95
      predictions <- as.matrix(task$X) %*% coef(private$.fit_object)
83 96
      return(predictions)

@@ -112,14 +112,16 @@
Loading
112 112
      super$initialize(params = args_to_list(), ...)
113 113
    }
114 114
  ),
115 -
116 115
  private = list(
117 -
    .properties = c("continuous", "binomial", "categorical", "weights"),
118 -
116 +
    .properties = c("continuous", "binomial", "weights"),
119 117
    .train = function(task) {
120 118
      args <- self$params
121 119
      outcome_type <- self$get_outcome_type(task)
122 120
121 +
      if (outcome_type$type == "categorical") {
122 +
        stop("Unsupported outcome type for Lrnr_dbarts")
123 +
      }
124 +
123 125
      # specify data
124 126
      args$x.train <- as.data.frame(task$X)
125 127
      args$y.train <- outcome_type$format(task$Y)
@@ -142,7 +144,6 @@
Loading
142 144
143 145
      return(fit_object)
144 146
    },
145 -
146 147
    .predict = function(task) {
147 148
      outcome_type <- private$.training_outcome_type
148 149

@@ -1,97 +1,91 @@
Loading
1 -
#' BART Machine Learner
1 +
#' bartMachine: Bayesian Additive Regression Trees (BART)
2 2
#'
3 -
#' This learner implements Bayesian Additive Regression Trees, using the
4 -
#' \code{bartMachine} package.
3 +
#' This learner implements Bayesian Additive Regression Trees via
4 +
#' \pkg{bartMachine} (described in \insertCite{bartMachine;textual}{sl3})
5 +
#' and the function \code{\link[bartMachine]{bartMachine}}.
5 6
#'
6 7
#' @docType class
7 8
#'
8 9
#' @importFrom R6 R6Class
9 10
#' @importFrom stats predict
10 -
#' @importFrom assertthat assert_that is.count is.flag
11 11
#'
12 12
#' @export
13 13
#'
14 14
#' @keywords data
15 15
#'
16 -
#' @return Learner object with methods for training and prediction. See
17 -
#'   \code{\link{Lrnr_base}} for documentation on learners.
16 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
17 +
#'  methods for training and prediction. For a full list of learner
18 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
18 19
#'
19 -
#' @format \code{\link{R6Class}} object.
20 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
21 +
#'  \code{\link{Lrnr_base}}.
20 22
#'
21 23
#' @family Learners
22 24
#'
23 25
#' @section Parameters:
24 -
#' \describe{
25 -
#'   \item{\code{Y}}{Outcome variable.}
26 -
#'   \item{\code{X}}{Covariate dataframe.}
27 -
#'   \item{\code{newX}}{Optional dataframe to predict the outcome.}
28 -
#'   \item{\code{obsWeights}}{Optional observation-level weights (supported but not tested).}
29 -
#'   \item{\code{id}}{Optional id to group observations from the same unit (not used
30 -
#'   currently).}
31 -
#'   \item{\code{family}}{"gaussian" for regression, "binomial" for binary classification.}
32 -
#'   \item{\code{num_trees }}{The number of trees to be grown in the sum-of-trees model.}
33 -
#'   \item{\code{num_burn_in}}{Number of MCMC samples to be discarded as "burn-in".}
34 -
#'   \item{\code{num_iterations_after_burn_in}}{Number of MCMC samples to draw from the
35 -
#'   posterior distribution of f(x).}
36 -
#'   \item{\code{alpha}}{Base hyperparameter in tree prior for whether a node is
37 -
#'   nonterminal or not.}
38 -
#'   \item{\code{beta}}{Power hyperparameter in tree prior for whether a node is
39 -
#'   nonterminal or not.}
40 -
#'   \item{\code{k}}{For regression, k determines the prior probability that E(Y|X) is
41 -
#'   contained in the interval (y_{min}, y_{max}), based on a normal
42 -
#'   distribution. For example, when k=2, the prior probability is 95\%. For
43 -
#'   classification, k determines the prior probability that E(Y|X) is between
44 -
#'   (-3,3). Note that a larger value of k results in more shrinkage and a more
45 -
#'   conservative fit.}
46 -
#'   \item{\code{q}}{Quantile of the prior on the error variance at which the data-based
47 -
#'   estimate is placed. Note that the larger the value of q, the more
48 -
#'   aggressive the fit as you are placing more prior weight on values lower
49 -
#'   than the data-based estimate. Not used for classification.}
50 -
#'   \item{\code{nu}}{Degrees of freedom for the inverse chi^2 prior. Not used for
51 -
#'   classification.}
52 -
#'   \item{\code{verbose }}{Prints information about progress of the algorithm to the
53 -
#'   screen.}
26 +
#'   - \code{...}: Parameters passed to \code{\link[bartMachine]{bartMachine}}.
27 +
#'       See it's documentation for details.
54 28
#'
55 -
#' }
29 +
#' @references
30 +
#'  \insertAllCited{}
56 31
#'
57 -
#' @template common_parameters
58 -
#
59 -
32 +
#' @examples
33 +
#' # set up ML task
34 +
#' data(cpp_imputed)
35 +
#' covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs")
36 +
#' task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz")
37 +
#'
38 +
#' # fit a bartMachine model and predict from it
39 +
#' bartMachine_learner <- make_learner(Lrnr_bartMachine)
40 +
#' bartMachine_fit <- bartMachine_learner$train(task)
41 +
#' preds <- bartMachine_fit$predict()
60 42
Lrnr_bartMachine <- R6Class(
61 43
  classname = "Lrnr_bartMachine",
62 44
  inherit = Lrnr_base, portable = TRUE, class = TRUE,
63 45
  public = list(
64 -
    initialize = function(num_trees = 50, num_burn_in = 250, verbose = F,
65 -
                          alpha = 0.95, beta = 2, k = 2, q = 0.9, nu = 3,
66 -
                          num_iterations_after_burn_in = 1000,
67 -
                          prob_rule_class = 0.5, ...) {
46 +
    initialize = function(...) {
47 +
      if (is.null(getOption("java.parameters"))) {
48 +
        warning(
49 +
          "User did not specify Java RAM option, and this learner often fails",
50 +
          " with the default RAM of 500MB,\n",
51 +
          "so setting that now as `options(java.parameters = '-Xmx2500m')`.\n\n",
52 +
          "Note that Xmx parameter's upper limit is system dependent \n",
53 +
          "(e.g., 32bit Windows will fail to work with anything much larger",
54 +
          "than 1500m), \n",
55 +
          "so ideally this option should be specified by the user."
56 +
        )
57 +
        options(java.parameters = "-Xmx2500m")
58 +
      }
68 59
      super$initialize(params = args_to_list(), ...)
69 60
    }
70 61
  ),
71 -
72 62
  private = list(
73 -
    .properties = c("continuous", "binomial", "categorical"),
74 -
63 +
    .properties = c("continuous", "binomial"),
75 64
    .train = function(task) {
76 65
      args <- self$params
77 -
      outcome_type <- self$get_outcome_type(task)
66 +
67 +
      # get verbosity
68 +
      if (is.null(args$verbose)) {
69 +
        args$verbose <- getOption("sl3.verbose")
70 +
      }
78 71
79 72
      # specify data
80 73
      args$X <- as.data.frame(task$X)
74 +
      outcome_type <- self$get_outcome_type(task)
75 +
      if (outcome_type$type == "categorical") {
76 +
        stop("Unsupported outcome type for Lrnr_bartMachine")
77 +
      }
81 78
      args$y <- outcome_type$format(task$Y)
82 79
83 80
      fit_object <- call_with_args(bartMachine::bartMachine, args)
84 81
85 82
      return(fit_object)
86 83
    },
87 -
88 84
    .predict = function(task) {
89 -
      # outcome_type <- private$.training_outcome_type
90 85
      predictions <- stats::predict(
91 86
        private$.fit_object,
92 87
        new_data = data.frame(task$X)
93 88
      )
94 -
95 89
      return(predictions)
96 90
    },
97 91
    .required_packages = c("rJava", "bartMachine")

@@ -40,10 +40,8 @@
Loading
40 40
      super$initialize(params = params, ...)
41 41
    }
42 42
  ),
43 -
44 43
  private = list(
45 44
    .properties = c("categorical"),
46 -
47 45
    .train = function(task) {
48 46
      outcome_type <- self$get_outcome_type(task)
49 47
@@ -80,7 +78,6 @@
Loading
80 78
      }
81 79
      return(fit_object)
82 80
    },
83 -
84 81
    .predict = function(task) {
85 82
      raw_preds <- lapply(self$fit_object, learner_fit_predict, task)
86 83
      raw_preds <- as.matrix(do.call(cbind, raw_preds))
@@ -93,6 +90,7 @@
Loading
93 90
      # compute other categories relative to baseline
94 91
      predictions <- baseline * transformed
95 92
      predictions <- cbind(baseline, predictions)
93 +
      colnames(predictions) <- levels(task$Y)
96 94
      predictions <- pack_predictions(predictions)
97 95
      return(predictions)
98 96
    },

@@ -55,15 +55,13 @@
Loading
55 55
      super$initialize(params = args_to_list(), ...)
56 56
    }
57 57
  ),
58 -
59 58
  private = list(
60 59
    .classify = FALSE,
61 60
    .return_prediction_as_vector = TRUE,
62 61
    .properties = c(
63 62
      "continuous", "binomial", "categorical", "weights",
64 -
      "offset"
63 +
      "offset", "h2o"
65 64
    ),
66 -
67 65
    .train = function(task) {
68 66
      verbose <- getOption("sl3.verbose")
69 67
      args <- self$params

@@ -1,9 +1,10 @@
Loading
1 -
#' Earth - multivariate adaptive regression splines
1 +
#' Earth: Multivariate Adaptive Regression Splines
2 2
#'
3 -
#' This learner provides fitting procedures for building regression models using
4 -
#' the techniques in Friedman’s papers "Multivariate Adaptive Regres-sion
5 -
#' Splines" and "Fast MARS", using the function \code{\link[earth]{earth}} from
6 -
#' the \code{earth} package.
3 +
#' This learner provides fitting procedures for building regression models thru
4 +
#' the spline regression techniques described in
5 +
#' \insertCite{friedman1991multivariate;textual}{sl3} and
6 +
#' \insertCite{friedman1993fast;textual}{sl3}, via \pkg{earth} and the function
7 +
#' \code{\link[earth]{earth}}.
7 8
#'
8 9
#' @docType class
9 10
#'
@@ -13,49 +14,56 @@
Loading
13 14
#'
14 15
#' @export
15 16
#'
16 -
#' @keywords data
17 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
18 +
#'  methods for training and prediction. For a full list of learner
19 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
17 20
#'
18 -
#' @return Learner object with methods for training and prediction. See
19 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
20 -
#'
21 -
#' @format \code{\link{R6Class}} object.
21 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
22 +
#'  \code{\link{Lrnr_base}}.
22 23
#'
23 24
#' @family Learners
24 25
#'
26 +
#' @references
27 +
#'  \insertAllCited{}
28 +
#'
25 29
#' @section Parameters:
26 -
#' \describe{
27 -
#'   \item{\code{degree}}{A \code{numeric} specifying the maximum degree of
28 -
#'     interactions to be used in the model. This defaults to 2, specifying up
29 -
#'     through one-way interaction terms. Note that this differs from the
30 -
#'     default of \code{earth::earth}.
31 -
#'   }
32 -
#'   \item{\code{penalty}}{Generalized Cross Validation (GCV) penalty per knot.
33 -
#'     Defaults to 3 as recommended for \code{degree} > 1 in the documentation
34 -
#'     of \code{earth::earth}. Special values (for use by knowledgeable users):
35 -
#'     The value 0 penalizes only terms, not knots. The value -1 means no
36 -
#'     penalty.
37 -
#'   }
38 -
#'   \item{\code{pmethod}}{Pruning method, defaulting to \code{"backward"}.
39 -
#'     Other options include \code{"none"}, \code{"exhaustive"},
40 -
#      \code{"forward"}, \code{"seqrep"}, \code{"cv"}.
41 -
#'   }
42 -
#'   \item{\code{nfold}}{Number of cross-validation folds. Default is0, no
43 -
#'     cross validation.
44 -
#'   }
45 -
#'   \item{\code{ncross}}{Only applies if \code{nfold} > 1. Number of
46 -
#'     cross-validations. Each cross-validation has \code{nfold} folds.
47 -
#'     Defaults to 1.
48 -
#'   }
49 -
#'   \item{\code{minspan}}{Minimum number of observations between knots.
50 -
#'   }
51 -
#'   \item{\code{endspan}}{Minimum number of observations before the first and
52 -
#'     after the final knot.
53 -
#'   }
54 -
#'   \item{\code{...}}{Other parameters passed to \code{\link[earth]{earth}}.
55 -
#'     See its documentation for details.
56 -
#'   }
57 -
#' }
58 -
#
30 +
#'   - \code{degree}: A \code{numeric} specifying the maximum degree of
31 +
#'       interactions to be used in the model. This defaults to 2, specifying
32 +
#'       up through one-way interaction terms. Note that this differs from the
33 +
#'       default of \code{\link[earth]{earth}}.
34 +
#'   - \code{penalty}: Generalized Cross Validation (GCV) penalty per knot.
35 +
#'       Defaults to 3 as per the recommendation for \code{degree} > 1 in the
36 +
#'       documentation of \code{\link[earth]{earth}}. Special values (for use
37 +
#'       by knowledgeable users): The value 0 penalizes only terms, not knots.
38 +
#'       The value -1 translates to no penalty.
39 +
#'   - \code{pmethod}: Pruning method, defaulting to \code{"backward"}. Other
40 +
#'       options include \code{"none"}, \code{"exhaustive"}, \code{"forward"},
41 +
#'       \code{"seqrep"}, \code{"cv"}.
42 +
#'   - \code{nfold}: Number of cross-validation folds. The default is 0, for no
43 +
#'       cross-validation.
44 +
#'   - \code{ncross}: Only applies if \code{nfold} > 1, indicating the number
45 +
#'       of cross-validation rounds. Each cross-validation has \code{nfold}
46 +
#'       folds. Defaults to 1.
47 +
#'   - \code{minspan}: Minimum number of observations between knots.
48 +
#'   - \code{endspan}: Minimum number of observations before the first and
49 +
#'       after the final knot.
50 +
#'   - \code{...}: Other parameters passed to \code{\link[earth]{earth}}. See
51 +
#'       its documentation for details.
52 +
#'
53 +
#' @examples
54 +
#' data(cpp_imputed)
55 +
#' covars <- c(
56 +
#'   "apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs", "sexn"
57 +
#' )
58 +
#' outcome <- "haz"
59 +
#' task <- sl3_Task$new(cpp_imputed,
60 +
#'   covariates = covars,
61 +
#'   outcome = outcome
62 +
#' )
63 +
#' # fit and predict from a MARS model
64 +
#' earth_lrnr <- make_learner(Lrnr_earth)
65 +
#' earth_fit <- earth_lrnr$train(task)
66 +
#' earth_preds <- earth_fit$predict(task)
59 67
Lrnr_earth <- R6Class(
60 68
  classname = "Lrnr_earth", inherit = Lrnr_base,
61 69
  portable = TRUE, class = TRUE,
@@ -67,10 +75,8 @@
Loading
67 75
      super$initialize(params = params, ...)
68 76
    }
69 77
  ),
70 -
71 78
  private = list(
72 79
    .properties = c("continuous", "binomial"),
73 -
74 80
    .train = function(task) {
75 81
      args <- self$params
76 82
      outcome_type <- self$get_outcome_type(task)
@@ -99,7 +105,6 @@
Loading
99 105
      fit_object <- call_with_args(earth_fun, args)
100 106
      return(fit_object)
101 107
    },
102 -
103 108
    .predict = function(task) {
104 109
      preds <- stats::predict(
105 110
        object = private$.fit_object, newdata = task$X,

@@ -1,7 +1,11 @@
Loading
1 -
#' Generalized Additive Models
1 +
#' GAM: Generalized Additive Models
2 2
#'
3 -
#' This learner provides fitting procedures for generalized additive models
4 -
#' using \code{\link[mgcv]{gam}}.
3 +
#' This learner provides fitting procedures for generalized additive models,
4 +
#' using the routines from \pkg{mgcv} through a call to the function
5 +
#' \code{\link[mgcv]{gam}}. The \pkg{mgcv} package and the use of GAMs are
6 +
#' described thoroughly (with examples) in \insertCite{mgcv;textual}{sl3},
7 +
#' while \insertCite{hastie-gams;textual}{sl3} also provided an earlier quite
8 +
#' thorough look at GAMs.
5 9
#'
6 10
#' @docType class
7 11
#'
@@ -12,38 +16,51 @@
Loading
12 16
#'
13 17
#' @keywords data
14 18
#'
15 -
#' @return Learner object with methods for training and prediction. See
16 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
19 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
20 +
#'  methods for training and prediction. For a full list of learner
21 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
17 22
#'
18 -
#' @format \code{\link{R6Class}} object.
23 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
24 +
#'  \code{\link{Lrnr_base}}.
19 25
#'
20 26
#' @family Learners
21 27
#'
22 28
#' @section Parameters:
23 -
#' \describe{
24 -
#'   \item{\code{formula}}{An optional argument specifying the formula of GAM.
25 -
#'   Input type can be formula or string, or a list of them. If not specified,
26 -
#'   continuous covariates will be smoothen with the smooth terms represented
27 -
#'   using `penalized thin plate regression splines'. For a detailed
28 -
#'   description, please consult the documentation for \code{\link[mgcv]{gam}}.}
29 +
#'   - \code{formula}: An optional argument specifying the formula of GAM.
30 +
#'       Input type can be formula or string, or a list of them. If not
31 +
#'       specified, continuous covariates will be smoothened with the smooth
32 +
#'       terms represented using "penalized thin plate regression splines". For
33 +
#'       a more detailed description, please consult the documentation for
34 +
#'       \code{\link[mgcv]{gam}}.
35 +
#'   - \code{family}: An optional argument specifying the family of the GAM.
36 +
#'       See \code{\link[stats]{family}} and \code{\link[mgcv]{family.mgcv}}
37 +
#'       for a list of available family functions. If left unspecified, it will
38 +
#'       be inferred depending on the detected type of the outcome. For now,
39 +
#'       GAM supports binomial and gaussian outcome types, if \code{formula} is
40 +
#'       unspecified. For a more detailed description of this argument, please
41 +
#'       consult the documentation of \code{\link[mgcv]{gam}}.
42 +
#'   - \code{method}: An optional argument specifying the method for smoothing
43 +
#'       parameter selection. The default is global cross-validation (GCV). For
44 +
#'       more detaileds on this argument, consult the documentation of
45 +
#'       \code{\link[mgcv]{gam}}.
46 +
#'   - \code{...}: Other parameters passed to \code{\link[mgcv]{gam}}. See its
47 +
#'       documentation for details.
29 48
#'
30 -
#'   \item{\code{family}}{An optional argument specifying the family of GAM. See
31 -
#'   \code{\link{family}} and \code{\link[mgcv]{family.mgcv}} for a list of
32 -
#'   available families. If not specified, it will be inferred depending on the
33 -
#'   type of the outcome. For now the GAM supports binomial and gaussian if
34 -
#'   \code{formula} is not specified. For a detailed description, please consult
35 -
#'   the documentation for \code{\link[mgcv]{gam}}.}
49 +
#' @references
50 +
#'  \insertAllCited{}
36 51
#'
37 -
#'   \item{\code{method}}{An optional argument specifying the method for
38 -
#'   smoothing parameter selection criterion. Default is set to GCV. For a
39 -
#'   detailed description, please consult the documentation for
40 -
#'   \code{\link[mgcv]{gam}}.}
41 -
#'
42 -
#'   \item{\code{...}}{Other parameters passed to \code{\link[mgcv]{gam}}.}
43 -
#' }
44 -
#'
45 -
#' @template common_parameters
46 -
#
52 +
#' @examples
53 +
#' data(cpp_imputed)
54 +
#' # create task for prediction
55 +
#' cpp_task <- sl3_Task$new(
56 +
#'   data = cpp_imputed,
57 +
#'   covariates = c("bmi", "parity", "mage", "sexn"),
58 +
#'   outcome = "haz"
59 +
#' )
60 +
#' # initialization, training, and prediction with the defaults
61 +
#' gam_lrnr <- Lrnr_gam$new()
62 +
#' gam_fit <- gam_lrnr$train(cpp_task)
63 +
#' gam_preds <- gam_fit$predict()
47 64
Lrnr_gam <- R6Class(
48 65
  classname = "Lrnr_gam", inherit = Lrnr_base,
49 66
  portable = TRUE, class = TRUE,
@@ -56,10 +73,8 @@
Loading
56 73
      super$initialize(params = params, ...)
57 74
    }
58 75
  ),
59 -
60 76
  private = list(
61 -
    .properties = c("continuous", "binomial", "categorical"),
62 -
77 +
    .properties = c("continuous", "binomial"),
63 78
    .train = function(task) {
64 79
      # load args
65 80
      args <- self$params
@@ -72,16 +87,15 @@
Loading
72 87
      ## family
73 88
      if (is.null(args$family)) {
74 89
        if (outcome_type$type == "continuous") {
75 -
          args$family <- gaussian
90 +
          args$family <- stats::gaussian()
76 91
        } else if (outcome_type$type == "binomial") {
77 -
          args$family <- binomial
92 +
          args$family <- stas::binomial()
78 93
        } else if (outcome_type$type == "categorical") {
79 94
          # TODO: implement categorical?
80 -
          ##      (have to specify (#{categories} - 1)
81 -
          ##       linear predictors in formula)
82 -
          stop("Categorical outcome is unsupported in Lrnr_gam for now.")
95 +
          # NOTE: must specify (#{categories}-1)+linear_predictors) in formula
96 +
          stop("Categorical outcomes are unsupported by Lrnr_gam for now.")
83 97
        } else {
84 -
          stop("Specified outcome type is unsupported in Lrnr_gam.")
98 +
          stop("Specified outcome type is unsupported by Lrnr_gam.")
85 99
        }
86 100
      }
87 101
      ## formula
@@ -95,10 +109,14 @@
Loading
95 109
        i_continuous <- covars_type == "continuous"
96 110
        X_continuous <- task$X[, ..i_continuous]
97 111
        "y ~ s(x1) + s(x2) + x3"
98 -
        X_smooth <- sapply(
99 -
          colnames(X_continuous),
100 -
          function(iter) paste0("s(", iter, ")")
101 -
        )
112 +
        X_smooth <- sapply(colnames(X_continuous), function(x) {
113 +
          unique_x <- unlist(unique(task$X[, x, with = F]))
114 +
          if (length(unique_x) < 10) {
115 +
            paste0("s(", x, ", k=", length(unique_x), ")")
116 +
          } else {
117 +
            paste0("s(", x, ")")
118 +
          }
119 +
        })
102 120
        if (length(X_continuous) > 0 & length(X_discrete) > 0) {
103 121
          args$formula <- as.formula(paste(c(
104 122
            colnames(Y),
@@ -137,21 +155,21 @@
Loading
137 155
      } else {
138 156
        args$formula <- as.formula(args$formula)
139 157
      }
158 +
140 159
      # fit
141 160
      fit_object <- call_with_args(mgcv::gam, args)
142 161
      return(fit_object)
143 162
    },
144 -
145 163
    .predict = function(task) {
146 164
      # get predictions
147 165
      predictions <- stats::predict(
148 166
        private$.fit_object,
149 -
        newdata = task$X
167 +
        newdata = task$X,
168 +
        type = "response"
150 169
      )
151 170
      predictions <- as.numeric(predictions)
152 171
      return(predictions)
153 172
    },
154 -
155 173
    .required_packages = c("mgcv")
156 174
  )
157 175
)

@@ -37,10 +37,8 @@
Loading
37 37
      super$initialize(params = params)
38 38
    }
39 39
  ),
40 -
41 40
  private = list(
42 41
    .properties = c("timeseries", "continuous"),
43 -
44 42
    .train = function(task) {
45 43
      args <- self$params
46 44
      learner <- args$learner
@@ -60,7 +58,6 @@
Loading
60 58
      fit_object <- call_with_args(learner_fun, args)
61 59
      return(fit_object)
62 60
    },
63 -
64 61
    .predict = function(task = NULL) {
65 62
      params <- self$params
66 63
      h <- ts_get_pred_horizon(self$training_task, task)

@@ -42,10 +42,8 @@
Loading
42 42
      super$initialize(params = args_to_list(), ...)
43 43
    }
44 44
  ),
45 -
46 45
  private = list(
47 46
    .properties = c("continuous", "binomial", "categorical", "weights"),
48 -
49 47
    .train = function(task) {
50 48
      args <- self$params
51 49
      outcome_type <- self$get_outcome_type(task)
@@ -70,7 +68,6 @@
Loading
70 68
71 69
      return(fit_object)
72 70
    },
73 -
74 71
    .predict = function(task) {
75 72
      outcome_type <- private$.training_outcome_type
76 73

@@ -1,8 +1,12 @@
Loading
1 1
#' Variable Importance Screener
2 2
#'
3 -
#' This learner provides screening of covariates based on the variables sorted
4 -
#' in decreasing order of importance, where the importance metric is based on
5 -
#' a learner that supports importance.
3 +
#' This learner screens covariates based on their variable importance, where the
4 +
#' importance values are obtained from the \code{learner}. Any learner with an
5 +
#' \code{importance} method can be used. The set of learners with support for
6 +
#' \code{importance} can be found with \code{sl3_list_learners("importance")}.
7 +
#' Like all other screeners, this learner is intended for use in a
8 +
#' \code{\link{Pipeline}}, so the output from this learner (i.e., the selected
9 +
#' covariates) can be used as input for the next learner in the pipeline.
6 10
#'
7 11
#' @docType class
8 12
#'
@@ -12,22 +16,59 @@
Loading
12 16
#'
13 17
#' @keywords data
14 18
#'
15 -
#' @return Learner object with methods for training and prediction. See
16 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
19 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
20 +
#'  methods for training and prediction. For a full list of learner
21 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
17 22
#'
18 -
#' @format \code{\link{R6Class}} object.
23 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
24 +
#'  \code{\link{Lrnr_base}}.
19 25
#'
20 26
#' @family Learners
21 27
#'
22 28
#' @section Parameters:
23 -
#' \describe{
24 -
#'   \item{\code{learner}}{An instantiated learner that supports variable
25 -
#'   importance.}
26 -
#'   \item{\code{num_screen = 5}}{The top number of most important variables
27 -
#'   to retain.}
28 -
#'   \item{\code{...}}{Other parameters passed to \code{learner}'s
29 -
#'   \code{importance} function.}
30 -
#' }
29 +
#'  - \code{learner}: An instantiated learner that supports variable importance.
30 +
#'      The set of learners with this support can be obtained via
31 +
#'      \code{sl3_list_learners("importance")}.
32 +
#'  - \code{num_screen = 5}: The top n number of "most impotant" variables to
33 +
#'      retain.
34 +
#'  - \code{...}: Other parameters passed to the \code{learner}'s
35 +
#'      \code{importance} function.
36 +
#'
37 +
#' @examples
38 +
#' data(mtcars)
39 +
#' mtcars_task <- sl3_Task$new(
40 +
#'   data = mtcars,
41 +
#'   covariates = c(
42 +
#'     "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am",
43 +
#'     "gear", "carb"
44 +
#'   ),
45 +
#'   outcome = "mpg"
46 +
#' )
47 +
#' glm_lrnr <- make_learner(Lrnr_glm)
48 +
#'
49 +
#' # screening based on \code{\link{Lrnr_ranger}} variable importance
50 +
#' ranger_lrnr_importance <- Lrnr_ranger$new(importance = "impurity_corrected")
51 +
#' ranger_importance_screener <- Lrnr_screener_importance$new(
52 +
#'   learner = ranger_lrnr_importance, num_screen = 3
53 +
#' )
54 +
#' ranger_screen_glm_pipe <- Pipeline$new(ranger_importance_screener, glm_lrnr)
55 +
#' ranger_screen_glm_pipe_fit <- ranger_screen_glm_pipe$train(mtcars_task)
56 +
#'
57 +
#' # screening based on \code{\link{Lrnr_randomForest}} variable importance
58 +
#' rf_lrnr <- Lrnr_randomForest$new()
59 +
#' rf_importance_screener <- Lrnr_screener_importance$new(
60 +
#'   learner = rf_lrnr, num_screen = 3
61 +
#' )
62 +
#' rf_screen_glm_pipe <- Pipeline$new(rf_importance_screener, glm_lrnr)
63 +
#' rf_screen_glm_pipe_fit <- rf_screen_glm_pipe$train(mtcars_task)
64 +
#'
65 +
#' # screening based on \code{\link{Lrnr_randomForest}} variable importance
66 +
#' xgb_lrnr <- Lrnr_xgboost$new()
67 +
#' xgb_importance_screener <- Lrnr_screener_importance$new(
68 +
#'   learner = xgb_lrnr, num_screen = 3
69 +
#' )
70 +
#' xgb_screen_glm_pipe <- Pipeline$new(xgb_importance_screener, glm_lrnr)
71 +
#' xgb_screen_glm_pipe_fit <- xgb_screen_glm_pipe$train(mtcars_task)
31 72
Lrnr_screener_importance <- R6Class(
32 73
  classname = "Lrnr_screener_importance",
33 74
  inherit = Lrnr_base, portable = TRUE, class = TRUE,
@@ -46,7 +87,6 @@
Loading
46 87
  ),
47 88
  private = list(
48 89
    .properties = c("screener"),
49 -
50 90
    .train = function(task) {
51 91
      params <- self$params
52 92
@@ -61,7 +101,7 @@
Loading
61 101
      # extract variable names from importance result object
62 102
      if (is.null(rownames(importance_result))) {
63 103
        if (is.null(names(importance_result))) {
64 -
          stop("Importance result missing variable names. Cannot subset covs.")
104 +
          stop("Cannot find covariate names in importance result.")
65 105
        } else {
66 106
          importance_names_sorted <- names(importance_result)
67 107
        }
@@ -73,16 +113,15 @@
Loading
73 113
      # e.g., cov "color" was one-hot encoded and renamed as "color_blue",
74 114
      # "color_green", "color_red", so we change all three back to "color"
75 115
      covs <- task$nodes$covariates
76 -
      no_match_covs <- is.na(pmatch(covs, importance_names_sorted))
77 -
      if (any(no_match_covs)) {
116 +
      matched_covs <- match(covs, importance_names_sorted)
117 +
      if (any(is.na(matched_covs))) {
78 118
        # which cov names do not exist in the importance_names_sorted?
79 -
        no_match_covs_idx <- which(no_match_covs)
80 -
        for (i in 1:length(no_match_covs_idx)) {
81 -
          cov_idx <- no_match_covs_idx[i]
119 +
        unmatched_covs <- covs[is.na(matched_covs)]
120 +
        for (i in 1:length(unmatched_covs)) {
82 121
          # which importance_names_sorted correspond to one cov
83 -
          idx <- grep(covs[cov_idx], importance_names_sorted)
122 +
          idx <- grep(unmatched_covs[i], importance_names_sorted)
84 123
          # rename importance_names_sorted according to true cov name
85 -
          importance_names_sorted[idx] <- rep(covs[cov_idx], length(idx))
124 +
          importance_names_sorted[idx] <- rep(unmatched_covs[i], length(idx))
86 125
        }
87 126
        importance_names_sorted <- unique(importance_names_sorted)
88 127
      }
@@ -93,7 +132,7 @@
Loading
93 132
      return(fit_object)
94 133
    },
95 134
    .predict = function(task) {
96 -
      task$X[, private$.fit_object$selected, with = FALSE, drop = FALSE]
135 +
      task$data[, private$.fit_object$selected, with = FALSE, drop = FALSE]
97 136
    },
98 137
    .chain = function(task) {
99 138
      return(task$next_in_chain(covariates = private$.fit_object$selected))

@@ -71,8 +71,7 @@
Loading
71 71
    }
72 72
  ),
73 73
  private = list(
74 -
    .properties = c("continuous", "binomial", "categorical"),
75 -
74 +
    .properties = c("continuous", "binomial", "categorical", "wrapper"),
76 75
    .train = function(task) {
77 76
      # set type
78 77
      outcome_type <- self$get_outcome_type(task)
@@ -101,7 +100,6 @@
Loading
101 100
      fit_object <- call_with_args(caret::train, args, keep_all = TRUE)
102 101
      return(fit_object)
103 102
    },
104 -
105 103
    .predict = function(task) {
106 104
      outcome_type <- self$training_outcome_type
107 105
      if (outcome_type$type == "continuous") {
@@ -123,7 +121,6 @@
Loading
123 121
      predictions <- as.numeric(predictions)
124 122
      return(predictions)
125 123
    },
126 -
127 124
    .required_packages = c("caret")
128 125
  )
129 126
)

@@ -1,138 +1,119 @@
Loading
1 1
#' Harmonic Regression
2 2
#'
3 3
#' @description This learner fits first harmonics in a Fourier expansion to one
4 -
#' or more time series. Fourier decomposition relies on
5 -
#' \code{\link[forecast]{fourier}}, and the time series is fit using
6 -
#' \code{\link[forecast]{tslm}}
4 +
#'  or more time series. Fourier decomposition relies on
5 +
#'  \code{\link[forecast]{fourier}}, and the time series is fit using
6 +
#'  \code{\link[forecast]{tslm}}. For further details on working with harmonic
7 +
#'  regression for time-series with package \pkg{forecast}, consider consulting
8 +
#'  \insertCite{forecast;textual}{sl3}) and
9 +
#'  \insertCite{hyndman2008forecast-jss;textual}{sl3}).
7 10
#'
8 11
#' @docType class
9 12
#'
10 13
#' @importFrom R6 R6Class
11 -
#' @importFrom assertthat assert_that is.count is.flag
12 14
#' @importFrom stats arima
13 15
#'
14 16
#' @export
15 17
#'
16 18
#' @keywords data
17 19
#'
18 -
#' @return Learner object with methods for training and prediction. See
19 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
20 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
21 +
#'  methods for training and prediction. For a full list of learner
22 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
20 23
#'
21 -
#' @format \code{\link{R6Class}} object.
24 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
25 +
#'  \code{\link{Lrnr_base}}.
22 26
#'
23 27
#' @family Learners
24 28
#'
25 29
#' @section Parameters:
26 -
#' \describe{
27 -
#'   \item{\code{Kparam}}{Maximum order of the fourier terms. Passed to
28 -
#'     \code{\link[forecast]{fourier}}.}
29 -
#'   \item{\code{n.ahead=NULL}}{ The forecast horizon. If not specified, returns
30 -
#'     forecast of size \code{task$Y}.}
31 -
#'   \item{\code{freq}}{The frequency of the time series.}
32 -
#'   \item{\code{...}}{Not used.}
33 -
#' }
34 -
#
30 +
#'  - \code{K}: Maximum order of the fourier terms. Passed to
31 +
#'      \code{\link[forecast]{fourier}}.
32 +
#'  - \code{freq}: The frequency of the time series.
33 +
#'  - \code{...}: Other parameters passed to \code{\link[forecast]{fourier}}.
34 +
#'
35 +
#' @references
36 +
#'  \insertAllCited{}
37 +
#'
38 +
#' @examples
39 +
#' library(origami)
40 +
#' library(data.table)
41 +
#' data(bsds)
42 +
#'
43 +
#' # make folds appropriate for time-series cross-validation
44 +
#' folds <- make_folds(bsds,
45 +
#'   fold_fun = folds_rolling_window, window_size = 500,
46 +
#'   validation_size = 100, gap = 0, batch = 50
47 +
#' )
48 +
#'
49 +
#' # build task by passing in external folds structure
50 +
#' task <- sl3_Task$new(
51 +
#'   data = bsds,
52 +
#'   folds = folds,
53 +
#'   covariates = c(
54 +
#'     "weekday", "temp"
55 +
#'   ),
56 +
#'   outcome = "cnt"
57 +
#' )
58 +
#'
59 +
#' # create tasks for taining and validation
60 +
#' train_task <- training(task, fold = task$folds[[1]])
61 +
#' valid_task <- validation(task, fold = task$folds[[1]])
62 +
#'
63 +
#' # instantiate learner, then fit and predict
64 +
#' HarReg_learner <- Lrnr_HarmonicReg$new(K = 7, freq = 105)
65 +
#' HarReg_fit <- HarReg_learner$train(train_task)
66 +
#' HarReg_preds <- HarReg_fit$predict(valid_task)
35 67
Lrnr_HarmonicReg <- R6Class(
36 68
  classname = "Lrnr_HarmonicReg",
37 69
  inherit = Lrnr_base,
38 70
  portable = TRUE,
39 71
  class = TRUE,
40 72
  public = list(
41 -
    initialize = function(Kparam, n.ahead = NULL, freq, ...) {
73 +
    initialize = function(K, freq, ...) {
42 74
      params <- args_to_list()
43 75
      super$initialize(params = params, ...)
44 -
      if (!is.null(n.ahead)) {
45 -
        warning("n.ahead paramater is specified- obtaining an ensemble will fail. 
46 -
                Please only use for obtaining individual learner forcasts.")
47 -
      }
48 76
    }
49 77
  ),
50 -
51 78
  private = list(
52 79
    .properties = c("timeseries", "continuous"),
53 -
54 80
    .train = function(task) {
55 -
      params <- self$params
56 -
      Kparam <- params[["Kparam"]]
57 -
      freq <- params[["freq"]]
58 -
      task_ts <- ts(task$Y, frequency = freq)
81 +
      args <- self$params
82 +
      args$x <- ts(task$Y, frequency = args$freq)
59 83
60 -
      if (length(freq) != length(Kparam)) {
84 +
      # Checks
85 +
      if (length(args$freq) != length(args$K)) {
61 86
        stop("Number of periods does not match number of orders")
62 -
      } else if (any(2 * Kparam > freq)) {
87 +
      } else if (any(2 * args$K > args$freq)) {
63 88
        stop("K must be not be greater than period/2")
64 89
      }
65 90
66 -
      fourier_fit <- forecast::fourier(task_ts, K = Kparam)
67 -
      fit_object <- forecast::tslm(task_ts ~ fourier_fit)
91 +
      # Passes a warning for an extra argument: that's ok
92 +
      # forecast::fourier doesn't take freq as an argument anymore
93 +
      fourier_fit <- call_with_args(
94 +
        forecast::fourier, args,
95 +
        ignore = "freq"
96 +
      )
97 +
      fit_object <- forecast::tslm(args$x ~ fourier_fit)
68 98
      return(fit_object)
69 99
    },
70 -
71 100
    .predict = function(task = NULL) {
72 -
      params <- self$params
73 -
      n.ahead <- params[["n.ahead"]]
74 -
      freq <- params[["freq"]]
75 -
      Kparam <- params[["Kparam"]]
76 -
77 -
      # See if there is gap between training and validation:
78 -
      gap <- min(task$folds[[1]]$validation_set) - max(task$folds[[1]]$training_set)
79 -
80 -
      if (gap > 1) {
81 -
        if (is.null(n.ahead)) {
82 -
          n.ahead <- task$nrow + gap
83 -
        } else {
84 -
          n.ahead <- n.ahead + gap
85 -
        }
86 -
        task_ts <- ts(task$Y, frequency = freq)
87 -
        fourier_fit <- data.frame(forecast::fourier(
88 -
          task_ts,
89 -
          K = Kparam,
90 -
          h = n.ahead
91 -
        ))
92 -
        predictions <- forecast::forecast(private$.fit_object, fourier_fit)
93 -
94 -
        # Create output as in glm
95 -
        predictions <- as.numeric(predictions$mean)
96 -
        predictions <- structure(predictions, names = seq_len(length(predictions)))
97 -
        return(predictions)
98 -
      } else if (gap == 1) {
99 -
        if (is.null(n.ahead)) {
100 -
          n.ahead <- task$nrow
101 -
        }
102 -
        task_ts <- ts(task$Y, frequency = freq)
103 -
        fourier_fit <- data.frame(forecast::fourier(
104 -
          task_ts,
105 -
          K = Kparam,
106 -
          h = n.ahead
107 -
        ))
108 -
        predictions <- forecast::forecast(private$.fit_object, fourier_fit)
109 -
110 -
        # Create output as in glm
111 -
        predictions <- as.numeric(predictions$mean)
112 -
        predictions <- structure(predictions, names = seq_len(length(predictions)))
113 -
        return(predictions)
114 -
      } else if (gap < 1) {
115 -
        warning("Validation samples come before Training samples; 
116 -
                please specify one of the time-series fold structures.")
117 -
118 -
        if (is.null(n.ahead)) {
119 -
          n.ahead <- task$nrow
120 -
        }
121 -
        task_ts <- ts(task$Y, frequency = freq)
122 -
        fourier_fit <- data.frame(forecast::fourier(
123 -
          task_ts,
124 -
          K = Kparam,
125 -
          h = n.ahead
126 -
        ))
127 -
        predictions <- forecast::forecast(private$.fit_object, fourier_fit)
128 -
129 -
        # Create output as in glm
130 -
        predictions <- as.numeric(predictions$mean)
131 -
        predictions <- structure(predictions, names = seq_len(length(predictions)))
132 -
        return(predictions)
133 -
      }
101 +
      h <- ts_get_pred_horizon(self$training_task, task)
102 +
      x <- ts(task$Y, frequency = self$params$freq)
103 +
      fourier_fit <- data.frame(
104 +
        forecast::fourier(
105 +
          x,
106 +
          K = self$params$K,
107 +
          h = h
108 +
        )
109 +
      )
110 +
      raw_preds <- forecast::forecast(private$.fit_object, fourier_fit)
111 +
      preds <- as.numeric(raw_preds$mean)
112 +
      requested_preds <- ts_get_requested_preds(
113 +
        self$training_task, task, preds
114 +
      )
115 +
      return(requested_preds)
134 116
    },
135 -
136 117
    .required_packages = c("forecast")
137 118
  )
138 119
)

@@ -44,9 +44,8 @@
Loading
44 44
      super$initialize(params = params, ...)
45 45
    }
46 46
  ),
47 -
48 47
  private = list(
49 -
    .properties = c("binomial", "continuous", "weights", "ids"),
48 +
    .properties = c("binomial", "continuous", "weights", "ids", "wrapper"),
50 49
    .train = function(task) {
51 50
      args <- self$params
52 51
      wrapper <- args$wrapper_fun
@@ -65,7 +64,6 @@
Loading
65 64
      )$fit
66 65
      return(fit_object)
67 66
    },
68 -
69 67
    .predict = function(task) {
70 68
      args <- self$params
71 69
      outcome_type <- private$.training_outcome_type

@@ -34,7 +34,6 @@
Loading
34 34
      params <- list(revere_function = revere_function, ...)
35 35
      super$initialize(params = params, ...)
36 36
    },
37 -
38 37
    predict_fold = function(task, fold_number = "validation") {
39 38
      stop("this learner is meant for chaining only")
40 39
    },
@@ -48,24 +47,19 @@
Loading
48 47
      return(revere_task$revere_fold_task(fold_number))
49 48
    }
50 49
  ),
51 -
52 50
  active = list(
53 51
    name = function() {
54 52
      name <- paste("CV", self$params$learner$name, sep = "_")
55 53
    }
56 54
  ),
57 -
58 55
  private = list(
59 56
    .properties = c("wrapper", "cv"),
60 -
61 -
62 57
    .train = function(task, trained_sublearners) {
63 58
      fit_object <- list(
64 59
        revere_task = sl3_revere_Task$new(self$params$revere_function, task)
65 60
      )
66 61
      return(fit_object)
67 62
    },
68 -
69 63
    .predict = function(task) {
70 64
      stop("this learner is meant for chaining only")
71 65
      return(predictions)

@@ -1,48 +1,68 @@
Loading
1 1
#' Support Vector Machines
2 2
#'
3 -
#' This learner uses \code{\link[e1071]{svm}} from \code{e1071} to fit a support
4 -
#' vector machine (SVM).
3 +
#' This learner provides fitting procedures for support vector machines, using
4 +
#' the routines from \pkg{e1071} (described in \insertCite{e1071;textual}{sl3}
5 +
#' and \insertCite{libsvm;textual}{sl3}, the core library to which \pkg{e1071}
6 +
#' is an interface) through a call to the function \code{\link[e1071]{svm}}.
5 7
#'
6 8
#' @docType class
7 9
#'
8 10
#' @importFrom R6 R6Class
11 +
#' @importFrom stats predict
9 12
#'
10 13
#' @export
11 14
#'
12 15
#' @keywords data
13 16
#'
14 -
#' @return Learner object with methods for training and prediction. See
15 -
#'   \code{\link{Lrnr_base}} for documentation on learners.
17 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
18 +
#'  methods for training and prediction. For a full list of learner
19 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
16 20
#'
17 -
#' @format \code{\link{R6Class}} object.
21 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
22 +
#'  \code{\link{Lrnr_base}}.
18 23
#'
19 24
#' @family Learners
20 25
#'
21 26
#' @section Parameters:
22 -
#' \describe{
23 -
#'   \item{\code{scale = TRUE}}{A logical vector indicating the variables to be
24 -
#'     scaled. For a detailed description, please consult the documentation for
25 -
#'     \code{\link[e1071]{svm}}.}
26 -
#'   \item{\code{type = NULL}}{SVMs can be used as a classification machine, as
27 -
#'     a regression machine, or for novelty detection. Depending of whether the
28 -
#'     outcome is a factor or not, the default setting for this argument is
29 -
#'     "C-classification" or "eps-regression", respectively. This may be
30 -
#'     overwritten by setting an explicit value. For a full set of options,
31 -
#'    please consult the documentation for \code{\link[e1071]{svm}}.}
32 -
#'   \item{\code{kernel = "radial"}}{The kernel used in training and predicting.
33 -
#'     You might consider changing some of the optional parameters, depending on
34 -
#'     the kernel type. Options for kernels include: "linear", "polynomial",
35 -
#'     "radial" (the default), "sigmoid". For a detailed description, please
36 -
#'     consult the documentation for \code{\link[e1071]{svm}}.}
37 -
#'   \item{\code{fitted = TRUE}}{Logical indicating whether the fitted values
38 -
#'     should be computed and included in the model fit object or not
39 -
#'     (default: \code{TRUE}).}
40 -
#'   \item{\code{probability = FALSE}}{Logical indicating whether the model
41 -
#'     should allow for probability predictions (default: \code{FALSE}).}
42 -
#'   \item{\code{...}}{Other parameters passed to \code{\link[e1071]{svm}}.
43 -
#'     See its documentation for details.}
44 -
#' }
45 -
#
27 +
#'   - \code{scale = TRUE}: A logical vector indicating the variables to be
28 +
#'       scaled. For a detailed description, please consult the documentation
29 +
#'       for \code{\link[e1071]{svm}}.
30 +
#'   - \code{type = NULL}: SVMs can be used as a classification machine, as a
31 +
#'       a regression machine, or for novelty detection. Depending of whether
32 +
#'       the outcome is a factor or not, the default setting for this argument
33 +
#'       is "C-classification" or "eps-regression", respectively. This may be
34 +
#'       overwritten by setting an explicit value. For a full set of options,
35 +
#'       please consult the documentation for \code{\link[e1071]{svm}}.
36 +
#'   - \code{kernel = "radial"}: The kernel used in training and predicting.
37 +
#'       You may consider changing some of the optional parameters, depending
38 +
#'       on the kernel type. Kernel options include: "linear", "polynomial",
39 +
#'       "radial" (the default), "sigmoid". For a detailed description, consult
40 +
#'       the documentation for \code{\link[e1071]{svm}}.
41 +
#'   - \code{fitted = TRUE}: Logical indicating whether the fitted values
42 +
#'       should be computed and included in the model fit object or not.
43 +
#'   - \code{probability = FALSE}: Logical indicating whether the model should
44 +
#'       allow for probability predictions.
45 +
#'   - \code{...}: Other parameters passed to \code{\link[e1071]{svm}}. See its
46 +
#'       documentation for details.
47 +
#'
48 +
#' @references
49 +
#'  \insertAllCited{}
50 +
#'
51 +
#' @examples
52 +
#' data(mtcars)
53 +
#' # create task for prediction
54 +
#' mtcars_task <- sl3_Task$new(
55 +
#'   data = mtcars,
56 +
#'   covariates = c(
57 +
#'     "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am",
58 +
#'     "gear", "carb"
59 +
#'   ),
60 +
#'   outcome = "mpg"
61 +
#' )
62 +
#' # initialization, training, and prediction with the defaults
63 +
#' svm_lrnr <- Lrnr_svm$new()
64 +
#' svm_fit <- svm_lrnr$train(mtcars_task)
65 +
#' svm_preds <- svm_fit$predict()
46 66
Lrnr_svm <- R6Class(
47 67
  classname = "Lrnr_svm", inherit = Lrnr_base,
48 68
  portable = TRUE, class = TRUE,
@@ -53,28 +73,17 @@
Loading
53 73
                          fitted = TRUE,
54 74
                          probability = FALSE,
55 75
                          ...) {
56 -
      # this captures all parameters to initialize and saves them as self$params
57 76
      params <- args_to_list()
58 77
      super$initialize(params = params, ...)
59 78
    }
60 79
  ),
61 80
  private = list(
62 81
    .properties = c("continuous", "binomial", "categorical"),
63 -
64 -
    # .train takes task data and returns fit object used to generate predictions
65 82
    .train = function(task) {
66 -
      verbose <- getOption("sl3.verbose")
67 -
      # generate an argument list from the parameters that were
68 -
      # captured when your learner was initialized.
69 -
      # this allows users to pass arguments directly to your ml function
70 83
      args <- self$params
71 84
72 -
      # get outcome variable type
73 -
      # preferring learner$params$outcome_type first, then task$outcome_type
85 +
      # set SVM type based on detected outcome family
74 86
      outcome_type <- self$get_outcome_type(task)
75 -
76 -
      # should pass something on to your learner indicating outcome_type
77 -
      # e.g. family or objective
78 87
      if (is.null(args$type)) {
79 88
        if (outcome_type$type == "continuous") {
80 89
          args$type <- "eps-regression"
@@ -82,16 +91,17 @@
Loading
82 91
          args$type <- "C-classification"
83 92
          args$probability <- TRUE
84 93
        } else {
85 -
          stop("Specified outcome type is unsupported in Lrnr_svm.")
94 +
          stop("Detected outcome type is incompatible with Lrnr_svm.")
86 95
        }
87 96
      }
88 97
89 98
      # add task data to the argument list
90 -
      # what these arguments are called depends on the learner you are wrapping
91 99
      args$x <- as.matrix(task$X)
92 100
      args$y <- outcome_type$format(task$Y)
93 101
94 102
      if (task$has_node("weights")) {
103 +
        # e1071's SVM implementation does not support observation-level weights
104 +
        # NOTE: see, e.g., https://cran.r-project.org/web/packages/WeightSVM/
95 105
        args$weights <- task$weights
96 106
      }
97 107
@@ -99,27 +109,19 @@
Loading
99 109
        args$offset <- task$offset
100 110
      }
101 111
102 -
      # call a function that fits your algorithm
103 -
      # with the argument list you constructed
112 +
      # NOTE: SVM's formals is essentially empty, hence use of keep_all
104 113
      fit_object <- call_with_args(e1071::svm, args, keep_all = TRUE)
105 114
      return(fit_object)
106 115
    },
107 -
108 -
    # .predict takes a task and returns predictions from that task
109 116
    .predict = function(task) {
110 -
      # get predictions
111 -
112 117
      predictions <- stats::predict(
113 118
        private$.fit_object,
114 119
        newdata = task$X,
115 -
        probability = (task$outcome_type$type %in% c("binomial", "categorical"))
120 +
        probability = task$outcome_type$type %in% c("binomial", "categorical")
116 121
      )
117 122
118 -
119 -
120 123
      if (task$outcome_type$type %in% c("binomial", "categorical")) {
121 124
        predictions <- attr(predictions, "probabilities")
122 -
        # pack predictions in a single column
123 125
        predictions <- pack_predictions(predictions)
124 126
      } else {
125 127
        predictions <- as.numeric(predictions)

@@ -1,35 +1,58 @@
Loading
1 1
#' xgboost: eXtreme Gradient Boosting
2 2
#'
3 -
#' This learner provides fitting procedures for \code{xgboost} models, using the
4 -
#' \code{xgboost} package, using the \code{\link[xgboost]{xgb.train}} function.
5 -
#' Such models are classification and regression trees with extreme gradient
3 +
#' This learner provides fitting procedures for \code{xgboost} models, using
4 +
#' the \pkg{xgboost} package, via \code{\link[xgboost]{xgb.train}}. Such
5 +
#' models are classification and regression trees with extreme gradient
6 6
#' boosting. For details on the fitting procedure, consult the documentation of
7 -
#' the \code{xgboost} package.
7 +
#' the \pkg{xgboost} and \insertCite{xgboost;textual}{sl3}).
8 8
#'
9 9
#' @docType class
10 10
#'
11 11
#' @importFrom R6 R6Class
12 +
#' @importFrom stats predict
12 13
#'
13 14
#' @export
14 15
#'
15 16
#' @keywords data
16 17
#'
17 -
#' @return Learner object with methods for training and prediction. See
18 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
18 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
19 +
#'  methods for training and prediction. For a full list of learner
20 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
19 21
#'
20 -
#' @format \code{\link{R6Class}} object.
22 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
23 +
#'  \code{\link{Lrnr_base}}.
21 24
#'
22 25
#' @family Learners
23 26
#'
27 +
#' @seealso [Lrnr_gbm] for standard gradient boosting models (via the \pkg{gbm}
28 +
#'  package) and [Lrnr_lightgbm] for the faster and more efficient gradient
29 +
#'  boosted trees from the LightGBM framework (via the \pkg{lightgbm} package).
30 +
#'
24 31
#' @section Parameters:
25 -
#' \describe{
26 -
#'   \item{\code{nrounds=20}}{Number of fitting iterations.}
27 -
#'   \item{\code{...}}{Other parameters passed to
28 -
#'     \code{\link[xgboost]{xgb.train}}.}
29 -
#' }
32 +
#'   - \code{nrounds=20}: Number of fitting iterations.
33 +
#'   - \code{...}: Other parameters passed to \code{\link[xgboost]{xgb.train}}.
34 +
#'
35 +
#' @references
36 +
#'  \insertAllCited{}
37 +
#'
38 +
#' @examples
39 +
#' data(mtcars)
40 +
#' mtcars_task <- sl3_Task$new(
41 +
#'   data = mtcars,
42 +
#'   covariates = c(
43 +
#'     "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am",
44 +
#'     "gear", "carb"
45 +
#'   ),
46 +
#'   outcome = "mpg"
47 +
#' )
30 48
#'
31 -
#' @template common_parameters
32 -
#
49 +
#' # initialization, training, and prediction with the defaults
50 +
#' xgb_lrnr <- Lrnr_xgboost$new()
51 +
#' xgb_fit <- xgb_lrnr$train(mtcars_task)
52 +
#' xgb_preds <- xgb_fit$predict()
53 +
#'
54 +
#' # get feature importance from fitted model
55 +
#' xgb_varimp <- xgb_fit$importance()
33 56
Lrnr_xgboost <- R6Class(
34 57
  classname = "Lrnr_xgboost", inherit = Lrnr_base,
35 58
  portable = TRUE, class = TRUE,
@@ -48,116 +71,127 @@
Loading
48 71
      # calculate importance metrics, already sorted by decreasing importance
49 72
      importance_result <- call_with_args(xgboost::xgb.importance, args)
50 73
      rownames(importance_result) <- importance_result[["Feature"]]
51 -
52 74
      return(importance_result)
53 75
    }
54 76
  ),
55 -
56 77
  private = list(
57 78
    .properties = c(
58 79
      "continuous", "binomial", "categorical", "weights",
59 80
      "offset", "importance"
60 81
    ),
61 -
62 82
    .train = function(task) {
63 83
      args <- self$params
64 84
65 85
      verbose <- args$verbose
66 -
      if (is.null(verbose)) verbose <- getOption("sl3.verbose")
86 +
      if (is.null(verbose)) {
87 +
        verbose <- getOption("sl3.verbose")
88 +
      }
89 +
      args$verbose <- as.integer(verbose)
67 90
91 +
      # set up outcome
68 92
      outcome_type <- self$get_outcome_type(task)
93 +
      Y <- outcome_type$format(task$Y)
94 +
      if (outcome_type$type == "categorical") {
95 +
        Y <- as.numeric(Y) - 1
96 +
      }
69 97
98 +
      # set up predictor data
70 99
      Xmat <- as.matrix(task$X)
71 100
      if (is.integer(Xmat)) {
72 101
        Xmat[, 1] <- as.numeric(Xmat[, 1])
73 102
      }
74 -
      Y <- outcome_type$format(task$Y)
75 -
      if (outcome_type$type == "categorical") {
76 -
        Y <- as.numeric(Y) - 1
103 +
      if (nrow(Xmat) != nrow(task$X) & ncol(Xmat) == nrow(task$X)) {
104 +
        Xmat <- t(Xmat)
77 105
      }
78 -
      args$data <- try(xgboost::xgb.DMatrix(Xmat, label = Y))
106 +
      args$data <- try(xgboost::xgb.DMatrix(Xmat, label = Y), silent = TRUE)
79 107
108 +
      # specify weights
80 109
      if (task$has_node("weights")) {
81 -
        try(xgboost::setinfo(args$data, "weight", task$weights))
110 +
        try(xgboost::setinfo(args$data, "weight", task$weights), silent = TRUE)
82 111
      }
112 +
113 +
      # specify offset
83 114
      if (task$has_node("offset")) {
84 115
        if (outcome_type$type == "categorical") {
85 -
          # todo: fix
116 +
          # TODO: fix
86 117
          stop("offsets not yet supported for outcome_type='categorical'")
87 118
        }
88 -
89 119
        family <- outcome_type$glm_family(return_object = TRUE)
90 120
        link_fun <- args$family$linkfun
91 121
        offset <- task$offset_transformed(link_fun)
92 -
        try(xgboost::setinfo(args$data, "base_margin", offset))
122 +
        try(xgboost::setinfo(args$data, "base_margin", offset), silent = TRUE)
93 123
      } else {
94 124
        link_fun <- NULL
95 125
      }
96 -
      args$verbose <- as.integer(verbose)
97 -
      args$watchlist <- list(train = args$data)
98 126
127 +
      # specify objective if it's NULL to avoid xgb warnings
99 128
      if (is.null(args$objective)) {
100 129
        if (outcome_type$type == "binomial") {
101 130
          args$objective <- "binary:logistic"
131 +
          args$eval_metric <- "logloss"
102 132
        } else if (outcome_type$type == "quasibinomial") {
103 133
          args$objective <- "reg:logistic"
104 134
        } else if (outcome_type$type == "categorical") {
105 135
          args$objective <- "multi:softprob"
106 -
          args$num_class <- length(outcome_type$levels)
136 +
          args$eval_metric <- "mlogloss"
137 +
          args$num_class <- as.integer(length(outcome_type$levels))
107 138
        }
108 139
      }
109 -
      fit_object <- call_with_args(xgboost::xgb.train, args, keep_all = TRUE)
110 140
141 +
      args$watchlist <- list(train = args$data)
142 +
      fit_object <- call_with_args(xgboost::xgb.train, args, keep_all = TRUE)
111 143
      fit_object$training_offset <- task$has_node("offset")
112 144
      fit_object$link_fun <- link_fun
113 145
114 146
      return(fit_object)
115 147
    },
116 -
117 148
    .predict = function(task = NULL) {
118 -
      outcome_type <- private$.training_outcome_type
119 -
      verbose <- getOption("sl3.verbose")
120 -
121 149
      fit_object <- private$.fit_object
122 150
151 +
      # set up test data for prediction
123 152
      Xmat <- as.matrix(task$X)
124 153
      if (is.integer(Xmat)) {
125 154
        Xmat[, 1] <- as.numeric(Xmat[, 1])
126 155
      }
127 156
      # order of columns has to be the same in xgboost training and test data
128 -
      Xmat <- Xmat[, match(fit_object$feature_names, colnames(Xmat))]
129 -
130 -
      xgb_data <- try(xgboost::xgb.DMatrix(Xmat))
157 +
      Xmat_ord <- as.matrix(Xmat[, match(fit_object$feature_names, colnames(Xmat))])
158 +
      if ((nrow(Xmat_ord) != nrow(Xmat)) & (ncol(Xmat_ord) == nrow(Xmat))) {
159 +
        Xmat_ord <- t(Xmat_ord)
160 +
      }
161 +
      stopifnot(nrow(Xmat_ord) == nrow(Xmat))
162 +
      # convert to xgb.DMatrix
163 +
      xgb_data <- try(xgboost::xgb.DMatrix(Xmat_ord), silent = TRUE)
131 164
165 +
      # incorporate offset, if it wasspecified in training
132 166
      if (self$fit_object$training_offset) {
133 -
        offset <- task$offset_transformed(self$fit_object$link_fun,
167 +
        offset <- task$offset_transformed(
168 +
          self$fit_object$link_fun,
134 169
          for_prediction = TRUE
135 170
        )
136 -
        xgboost::setinfo(xgb_data, "base_margin", offset)
171 +
        try(xgboost::setinfo(xgb_data, "base_margin", offset), silent = TRUE)
137 172
      }
138 173
139 -
      predictions <- rep.int(list(numeric()), 1)
174 +
      # incorporate ntreelimit, if training model was not a gblinear-based fit
175 +
      ntreelimit <- 0
176 +
      if (!is.null(fit_object[["best_ntreelimit"]]) &
177 +
        !("gblinear" %in% fit_object[["params"]][["booster"]])) {
178 +
        ntreelimit <- fit_object[["best_ntreelimit"]]
179 +
      }
140 180
181 +
      predictions <- rep.int(list(numeric()), 1)
141 182
      if (nrow(Xmat) > 0) {
142 -
        # Use ntreelimit for prediction, if used during model training.
143 -
        # Use it only for gbtree (not gblinear, i.e., glm -- not implemented)
144 -
        ntreelimit <- 0
145 -
        if (!is.null(fit_object[["best_ntreelimit"]]) &&
146 -
          !("gblinear" %in% fit_object[["params"]][["booster"]])) {
147 -
          ntreelimit <- fit_object[["best_ntreelimit"]]
148 -
        }
149 183
        # will generally return vector, needs to be put into data.table column
150 184
        predictions <- stats::predict(
151 185
          fit_object,
152 -
          newdata = xgb_data,
153 -
          ntreelimit = ntreelimit, reshape = TRUE
186 +
          newdata = xgb_data, ntreelimit = ntreelimit, reshape = TRUE
154 187
        )
188 +
189 +
        if (private$.training_outcome_type$type == "categorical") {
190 +
          # pack predictions in a single column
191 +
          predictions <- pack_predictions(predictions)
192 +
        }
155 193
      }
156 -
      if (outcome_type$type == "categorical") {
157 -
        # pack predictions in a single column
158 -
        predictions <- pack_predictions(predictions)
159 -
      }
160 -
      # names(pAoutDT) <- names(models_list)
194 +
161 195
      return(predictions)
162 196
    },
163 197
    .required_packages = c("xgboost")

@@ -50,13 +50,11 @@
Loading
50 50
      super$initialize(params = params, ...)
51 51
    }
52 52
  ),
53 -
54 53
  private = list(
55 54
    .properties = c(
56 55
      "continuous", "binomial", "categorical", "weights",
57 56
      "offset"
58 57
    ),
59 -
60 58
    .train = function(task) {
61 59
      verbose <- getOption("sl3.verbose")
62 60
      params <- self$params
@@ -102,7 +100,6 @@
Loading
102 100
      fit_object$name <- "optim"
103 101
      return(fit_object)
104 102
    },
105 -
106 103
    .predict = function(task = NULL) {
107 104
      verbose <- getOption("sl3.verbose")
108 105

@@ -1,8 +1,9 @@
Loading
1 1
#' Univariate GARCH Models
2 2
#'
3 -
#' This learner supports autoregressive fractionally integrated moving average
4 -
#' and various flavors of generalized autoregressive conditional
5 -
#' heteroskedasticity models for univariate time-series.
3 +
#' @description This learner supports autoregressive fractionally integrated
4 +
#'  moving average  and various flavors of generalized autoregressive
5 +
#'  conditional heteroskedasticity models for univariate time-series. All the
6 +
#'  models are fit using \code{\link[rugarch]{ugarchfit}}.
6 7
#'
7 8
#' @docType class
8 9
#'
@@ -12,31 +13,60 @@
Loading
12 13
#'
13 14
#' @keywords data
14 15
#'
15 -
#' @return Learner object with methods for training and prediction. See
16 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
16 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
17 +
#'  methods for training and prediction. For a full list of learner
18 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
17 19
#'
18 -
#' @format \code{\link{R6Class}} object.
20 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
21 +
#'  \code{\link{Lrnr_base}}.
19 22
#'
20 23
#' @family Learners
21 24
#'
22 25
#' @section Parameters:
23 -
#' \describe{
24 -
#'   \item{\code{variance.model}}{List containing variance model specification.
25 -
#'     This includes model, GARCH order, submodel, external regressors and
26 -
#'     variance tageting. Refer to \code{ugarchspec} for more information.}
27 -
#'   \item{\code{mean.model}}{List containing the mean model specification. This
28 -
#'     includes ARMA model, whether the mean should be included, and external
29 -
#'     regressors among others. Refer to \code{ugarchspec} for more
30 -
#'     information.}
31 -
#'   \item{\code{distribution.model="norm"}}{Conditional density to use for the
32 -
#'     innovations.}
33 -
#'   \item{\code{start.pars=list()}}{List of staring parameters for the
34 -
#'     optimization routine.}
35 -
#'   \item{\code{fixed.pars=list()}}{List of parameters which are to be kept
36 -
#'     fixed during the optimization.}
37 -
#'   \item{\code{n.ahead=NULL}}{The forecast horizon.}
38 -
#' }
39 -
#
26 +
#'  - \code{variance.model}: List containing variance model specification.
27 +
#'      This includes model, GARCH order, submodel, external regressors and
28 +
#'      variance tageting. Refer to \code{\link[rugarch]{ugarchspec}} for more
29 +
#'      information.
30 +
#'  - \code{mean.model}: List containing the mean model specification. This
31 +
#'      includes ARMA model, whether the mean should be included, and external
32 +
#'      regressors among others.
33 +
#'  - \code{distribution.model}: Conditional density to be used for the
34 +
#'      innovations.
35 +
#'  - \code{start.pars}:List of staring parameters for the optimization
36 +
#'      routine.
37 +
#'  - \code{fixed.pars}:List of parameters which are to be kept fixed during
38 +
#'      the optimization routine.
39 +
#'  - \code{...}: Other parameters passed to \code{\link[rugarch]{ugarchfit}}.
40 +
#'
41 +
#' @examples
42 +
#' library(origami)
43 +
#' library(data.table)
44 +
#' data(bsds)
45 +
#'
46 +
#' # make folds appropriate for time-series cross-validation
47 +
#' folds <- make_folds(bsds,
48 +
#'   fold_fun = folds_rolling_window, window_size = 500,
49 +
#'   validation_size = 100, gap = 0, batch = 50
50 +
#' )
51 +
#'
52 +
#' # build task by passing in external folds structure
53 +
#' task <- sl3_Task$new(
54 +
#'   data = bsds,
55 +
#'   folds = folds,
56 +
#'   covariates = c(
57 +
#'     "weekday", "temp"
58 +
#'   ),
59 +
#'   outcome = "cnt"
60 +
#' )
61 +
#'
62 +
#' # create tasks for taining and validation
63 +
#' train_task <- training(task, fold = task$folds[[1]])
64 +
#' valid_task <- validation(task, fold = task$folds[[1]])
65 +
#'
66 +
#' # instantiate learner, then fit and predict
67 +
#' HarReg_learner <- Lrnr_HarmonicReg$new(K = 7, freq = 105)
68 +
#' HarReg_fit <- HarReg_learner$train(train_task)
69 +
#' HarReg_preds <- HarReg_fit$predict(valid_task)
40 70
Lrnr_rugarch <- R6Class(
41 71
  classname = "Lrnr_rugarch", inherit = Lrnr_base,
42 72
  portable = TRUE, class = TRUE,
@@ -54,19 +84,13 @@
Loading
54 84
                              external.regressors = NULL, archex = FALSE
55 85
                            ),
56 86
                          distribution.model = "norm", start.pars = list(),
57 -
                          fixed.pars = list(), n.ahead = NULL, ...) {
87 +
                          fixed.pars = list(), ...) {
58 88
      params <- args_to_list()
59 89
      super$initialize(params = params, ...)
60 -
      if (!is.null(n.ahead)) {
61 -
        warning("n.ahead paramater is specified- obtaining an ensemble will fail. 
62 -
                Please only use for obtaining individual learner forcasts.")
63 -
      }
64 90
    }
65 91
  ),
66 -
67 92
  private = list(
68 93
    .properties = c("timeseries", "continuous"),
69 -
70 94
    .train = function(task) {
71 95
      args <- self$params
72 96
      # Support for a single time-series
@@ -78,60 +102,20 @@
Loading
78 102
79 103
    # Only simple forecast, do not implement CV based forecast here
80 104
    .predict = function(task = NULL) {
81 -
      params <- self$params
82 -
      n.ahead <- params[["n.ahead"]]
83 -
84 -
      # See if there is gap between training and validation:
85 -
      gap <- min(task$folds[[1]]$validation_set) - max(task$folds[[1]]$training_set)
86 -
87 -
      if (gap > 1) {
88 -
        if (is.null(n.ahead)) {
89 -
          n.ahead <- task$nrow + gap
90 -
        } else {
91 -
          n.ahead <- n.ahead + gap
92 -
        }
93 -
94 -
        # Give the same output as GLM
95 -
        predictions <- rugarch::ugarchforecast(
96 -
          private$.fit_object,
97 -
          data = task$X,
98 -
          n.ahead = n.ahead
99 -
        )
100 -
        predictions <- as.numeric(predictions@forecast$seriesFor)
101 -
        predictions <- structure(predictions, names = seq_len(length(predictions)))
102 -
        return(predictions)
103 -
      } else if (gap == 1) {
104 -
        if (is.null(n.ahead)) {
105 -
          n.ahead <- task$nrow
106 -
        }
107 -
108 -
        # Give the same output as GLM
109 -
        predictions <- rugarch::ugarchforecast(
110 -
          private$.fit_object,
111 -
          data = task$X,
112 -
          n.ahead = n.ahead
113 -
        )
114 -
        predictions <- as.numeric(predictions@forecast$seriesFor)
115 -
        predictions <- structure(predictions, names = seq_len(length(predictions)))
116 -
        return(predictions)
117 -
      } else if (gap < 1) {
118 -
        warning("Validation samples come before Training samples; 
119 -
                please specify one of the time-series fold structures.")
105 +
      fit_object <- private$.fit_object
106 +
      h <- ts_get_pred_horizon(self$training_task, task)
120 107
121 -
        if (is.null(n.ahead)) {
122 -
          n.ahead <- task$nrow
123 -
        }
108 +
      # Give the same output as GLM
109 +
      predictions <- rugarch::ugarchforecast(
110 +
        private$.fit_object,
111 +
        data = task$X,
112 +
        n.ahead = h
113 +
      )
124 114
125 -
        # Give the same output as GLM
126 -
        predictions <- rugarch::ugarchforecast(
127 -
          private$.fit_object,
128 -
          data = task$X,
129 -
          n.ahead = n.ahead
130 -
        )
131 -
        predictions <- as.numeric(predictions@forecast$seriesFor)
132 -
        predictions <- structure(predictions, names = seq_len(length(predictions)))
133 -
        return(predictions)
134 -
      }
115 +
      preds <- as.numeric(predictions@forecast$seriesFor)
116 +
      requested_preds <- ts_get_requested_preds(self$training_task, task,
117 +
                                                preds)
118 +
      return(requested_preds)
135 119
    },
136 120
    .required_packages = c("rugarch")
137 121
  )

@@ -23,10 +23,8 @@
Loading
23 23
      super$initialize(params = params, ...)
24 24
    }
25 25
  ),
26 -
27 26
  private = list(
28 -
    .properties = c("binomial", "continuous", "weights", "ids"),
29 -
27 +
    .properties = c("binomial", "continuous", "weights", "ids", "wrapper"),
30 28
    .train = function(task) {
31 29
      args <- self$params
32 30
      outcome_type <- self$get_outcome_type(task)
@@ -54,11 +52,9 @@
Loading
54 52
      fit_object <- list(selected = covariates[covariate_selected])
55 53
      return(fit_object)
56 54
    },
57 -
58 55
    .predict = function(task) {
59 56
      task$X[, private$.fit_object$selected, with = FALSE, drop = FALSE]
60 57
    },
61 -
62 58
    .chain = function(task) {
63 59
      return(task$next_in_chain(covariates = private$.fit_object$selected))
64 60
    },

@@ -1,7 +1,6 @@
Loading
1 1
#' @rdname variable_type
2 2
#'
3 3
#' @export
4 -
#
5 4
Variable_Type <- R6Class(
6 5
  classname = "Variable_Type",
7 6
  portable = TRUE,
@@ -11,7 +10,7 @@
Loading
11 10
                          pcontinuous = getOption("sl3.pcontinuous")) {
12 11
      if (is.null(type)) {
13 12
        if (is.null(x)) {
14 -
          stop("type not specified, and no x from which to infer it")
13 +
          stop("type not specified, and no x from which to infer it.")
15 14
        }
16 15
        nunique <- length(na.exclude(unique(x)))
17 16
        if (!is.null(ncol(x))) {
@@ -20,7 +19,8 @@
Loading
20 19
          type <- "constant"
21 20
        } else if (nunique == 2) {
22 21
          type <- "binomial"
23 -
        } else if ((is.factor(x)) || (((nunique / length(x)) < pcontinuous) && (nunique < 20))) {
22 +
        } else if ((is.factor(x)) || (((nunique / length(x)) < pcontinuous) &&
23 +
          (nunique < 20))) {
24 24
          type <- "categorical"
25 25
        } else {
26 26
          type <- "continuous"
@@ -50,9 +50,12 @@
Loading
50 50
    },
51 51
    glm_family = function(return_object = FALSE) {
52 52
      type <- self$type
53 -
      family <- switch(type, continuous = "gaussian", binomial = "binomial",
53 +
      family <- switch(type,
54 +
        continuous = "gaussian",
55 +
        binomial = "binomial",
54 56
        quasibinomial = "quasibinomial",
55 -
        categorical = "multinomial", constant = "binomial",
57 +
        categorical = "multinomial",
58 +
        constant = "binomial",
56 59
        "unknown"
57 60
      )
58 61
      if (family == "unknown") {
@@ -119,17 +122,17 @@
Loading
119 122
  )
120 123
)
121 124
122 -
#' Specify variable type
125 +
#' Specify Variable Type
123 126
#'
124 -
#' @param type A type name.
127 +
#' @param type A type name. Valid choices include "binomial", "categorical",
128 +
#'  "continuous", and "multivariate". When not specified, this is inferred.
125 129
#' @param levels Valid levels for discrete types.
126 -
#' @param bounds Bounds for continous variables.
130 +
#' @param bounds Bounds for continuous variables.
127 131
#' @param x Data to use for inferring type if not specified.
128 -
#' @param pcontinuous If \code{type} above is inferred, the proportion of unique
129 -
#'  observations above which variable is continuous
132 +
#' @param pcontinuous If \code{type} above is inferred, the proportion of
133 +
#'  unique observations above which the variable is considered continuous.
130 134
#'
131 135
#' @export
132 -
#
133 136
variable_type <- function(type = NULL, levels = NULL, bounds = NULL, x = NULL,
134 137
                          pcontinuous = getOption("sl3.pcontinuous")) {
135 138
  return(Variable_Type$new(

@@ -149,15 +149,18 @@
Loading
149 149
    xlab <- "Risk Difference"
150 150
  }
151 151
152 -
  # sort by decreasing importance and then subset
152 +
  # sort by decreasing importance
153 153
  x_sorted <- x[order(-x[, 2])]
154 -
  x_plotting <- x_sorted[1:(min(nvar, nrow(x_sorted))), ]
154 +
  # subset to include most at most nvar
155 +
  x_sorted <- x_sorted[1:(min(nvar, nrow(x_sorted))), ]
156 +
  # sort by increasing importance
157 +
  x_sorted <- x_sorted[order(x_sorted[, 2])]
155 158
156 -
  # modify into named vector for dotchart
157 -
  importance_scores <- x_plotting[[2]]
158 -
  names(importance_scores) <- x_plotting[[1]]
159 -
  dotchart(importance_scores,
159 +
  # make dotchart with most important variables on top
160 +
  # x_sorted[[2]] is importance scores & x_sorted[[1]] is covariate names
161 +
  dotchart(
162 +
    x = x_sorted[[2]], labels = x_sorted[[1]],
160 163
    xlab = xlab, ylab = "",
161 -
    xlim = c(min(importance_scores), max(importance_scores)), ...
164 +
    xlim = c(min(x_sorted[[2]]), max(x_sorted[[2]])), ...
162 165
  )
163 166
}

@@ -138,7 +138,6 @@
Loading
138 138
139 139
      invisible(self)
140 140
    },
141 -
142 141
    add_interactions = function(interactions, warn_on_existing = TRUE) {
143 142
      ## ----------------------------------------------------------------------
144 143
      ## Add columns with interactions (by reference) to input design matrix
@@ -159,25 +158,57 @@
Loading
159 158
        interaction_names <- sapply(interactions, paste0, collapse = "_")
160 159
      }
161 160
      is_new <- !(interaction_names %in% old_names)
162 -
      interaction_data <- lapply(interactions[is_new], function(interaction) {
163 -
        self$X[, prod.DT(.SD), .SD = interaction]
164 -
      })
165 161
      if (any(!is_new)) {
166 162
        warning(
167 163
          "The following interactions already exist:",
168 164
          paste0(interaction_names[!is_new], collapse = ", ")
169 165
        )
170 166
      }
171 -
      setDT(interaction_data)
172 -
      setnames(interaction_data, interaction_names[is_new])
173 -
      interaction_columns <- self$add_columns(interaction_data)
174 -
      new_covariates <- c(self$nodes$covariates, interaction_names[is_new])
167 +
      interaction_data <- lapply(interactions[is_new], function(int) {
168 +
        # check if interaction terms numeric
169 +
        int_numeric <- sapply(int, function(i) is.numeric(self$X[[i]]))
170 +
        if (all(int_numeric)) {
171 +
          d_int <- data.table(self$X[, prod.DT(.SD), .SD = int])
172 +
          setnames(d_int, paste0(int, collapse = "_"))
173 +
          return(d_int)
174 +
        } else {
175 +
          # match interaction terms to X
176 +
          Xmatch <- lapply(int, function(i) grep(i, colnames(self$X), value = T))
177 +
          Xint <- as.list(as.data.frame(t(expand.grid(Xmatch))))
178 +
179 +
          d_Xint <- lapply(Xint, function(Xint) self$X[, prod.DT(.SD), .SD = Xint])
180 +
          setDT(d_Xint)
181 +
          setnames(d_Xint, sapply(Xint, paste0, collapse = "_"))
182 +
183 +
          no_Xint <- rowSums(d_Xint) == 0 # happens when we omit 1 factor level
184 +
          if (any(int_numeric)) {
185 +
            d_Xint$other <- rep(0, nrow(d_Xint))
186 +
            d_Xint[no_Xint, "other"] <- 1
187 +
            if (any(int_numeric)) {
188 +
              # we actually need to take the product if we have a numeric covariate
189 +
              d_Xint[no_Xint, "other"] <- prod.DT(data.table(
190 +
                rep(1, sum(no_Xint)),
191 +
                self$X[no_Xint, names(which(int_numeric)), with = F]
192 +
              ))
193 +
            }
194 +
            other_name <- paste0("other.", paste0(int, collapse = "_"))
195 +
            colnames(d_Xint)[ncol(d_Xint)] <- other_name
196 +
          }
197 +
          return(d_Xint)
198 +
        }
199 +
      })
200 +
201 +
      interaction_names <- unlist(lapply(interaction_data, colnames))
202 +
      interaction_data <- data.table(do.call(cbind, interaction_data))
203 +
      setnames(interaction_data, interaction_names)
204 +
205 +
      interaction_cols <- self$add_columns(interaction_data, column_uuid = NULL)
206 +
      new_covariates <- c(self$nodes$covariates, interaction_names)
175 207
      return(self$next_in_chain(
176 208
        covariates = new_covariates,
177 -
        column_names = interaction_columns
209 +
        column_names = interaction_cols
178 210
      ))
179 211
    },
180 -
181 212
    add_columns = function(new_data, column_uuid = uuid::UUIDgenerate()) {
182 213
      if (is.numeric(private$.row_index)) {
183 214
        new_col_map <- private$.shared_data$add_columns(
@@ -197,7 +228,6 @@
Loading
197 228
      # return an updated column_names map
198 229
      return(column_names)
199 230
    },
200 -
201 231
    next_in_chain = function(covariates = NULL, outcome = NULL, id = NULL,
202 232
                             weights = NULL, offset = NULL, time = NULL,
203 233
                             folds = NULL, column_names = NULL,
@@ -311,7 +341,6 @@
Loading
311 341
      )
312 342
      return(new_task)
313 343
    },
314 -
315 344
    get_data = function(rows = NULL, columns, expand_factors = FALSE) {
316 345
      if (missing(rows)) {
317 346
        rows <- private$.row_index
@@ -330,12 +359,10 @@
Loading
330 359
      }
331 360
      return(subset)
332 361
    },
333 -
334 362
    has_node = function(node_name) {
335 363
      node_var <- private$.nodes[[node_name]]
336 364
      return(!is.null(node_var))
337 365
    },
338 -
339 366
    get_node = function(node_name, generator_fun = NULL,
340 367
                        expand_factors = FALSE) {
341 368
      if (missing(generator_fun)) {
@@ -357,7 +384,6 @@
Loading
357 384
        }
358 385
      }
359 386
    },
360 -
361 387
    offset_transformed = function(link_fun = NULL, for_prediction = FALSE) {
362 388
      if (self$has_node("offset")) {
363 389
        offset <- self$offset
@@ -372,27 +398,22 @@
Loading
372 398
      }
373 399
      return(offset)
374 400
    },
375 -
376 401
    print = function() {
377 402
      cat(sprintf("A sl3 Task with %d obs and these nodes:\n", self$nrow))
378 403
      print(self$nodes)
379 404
    },
380 -
381 405
    revere_fold_task = function(fold_number) {
382 406
      return(self)
383 407
    }
384 408
  ),
385 -
386 409
  active = list(
387 410
    internal_data = function() {
388 411
      return(private$.shared_data)
389 412
    },
390 -
391 413
    data = function() {
392 414
      all_nodes <- unique(unlist(private$.nodes))
393 415
      return(self$get_data(, all_nodes))
394 416
    },
395 -
396 417
    nrow = function() {
397 418
      if (is.null(private$.row_index)) {
398 419
        return(private$.shared_data$nrow)
@@ -400,17 +421,14 @@
Loading
400 421
        return(length(private$.row_index))
401 422
      }
402 423
    },
403 -
404 424
    nodes = function() {
405 425
      return(private$.nodes)
406 426
    },
407 -
408 427
    X = function() {
409 428
      covariates <- private$.nodes$covariates
410 429
      X_dt <- self$get_data(, covariates, expand_factors = TRUE)
411 430
      return(X_dt)
412 431
    },
413 -
414 432
    X_intercept = function() {
415 433
      # returns X matrix with manually generated intercept column
416 434
      X_dt <- self$X
@@ -428,27 +446,22 @@
Loading
428 446
429 447
      return(X_dt)
430 448
    },
431 -
432 449
    Y = function() {
433 450
      return(self$get_node("outcome"))
434 451
    },
435 -
436 452
    offset = function() {
437 453
      return(self$get_node("offset"))
438 454
    },
439 -
440 455
    weights = function() {
441 456
      return(self$get_node("weights", function(node_var, n) {
442 457
        rep(1, n)
443 458
      }))
444 459
    },
445 -
446 460
    id = function() {
447 461
      return(self$get_node("id", function(node_var, n) {
448 462
        seq_len(n)
449 463
      }))
450 464
    },
451 -
452 465
    time = function() {
453 466
      return(self$get_node("time", function(node_var, n) {
454 467
        if (self$has_node("id")) {
@@ -458,7 +471,6 @@
Loading
458 471
        }
459 472
      }))
460 473
    },
461 -
462 474
    folds = function(new_folds) {
463 475
      if (!missing(new_folds)) {
464 476
        private$.folds <- new_folds
@@ -484,24 +496,19 @@
Loading
484 496
      }
485 497
      return(private$.folds)
486 498
    },
487 -
488 499
    uuid = function() {
489 500
      return(private$.uuid)
490 501
    },
491 -
492 502
    column_names = function() {
493 503
      return(private$.column_names)
494 504
    },
495 -
496 505
    outcome_type = function() {
497 506
      return(private$.outcome_type)
498 507
    },
499 -
500 508
    row_index = function() {
501 509
      return(private$.row_index)
502 510
    }
503 511
  ),
504 -
505 512
  private = list(
506 513
    .shared_data = NULL,
507 514
    .nodes = NULL,

@@ -82,7 +82,6 @@
Loading
82 82
      super$initialize(params = params, ...)
83 83
    }
84 84
  ),
85 -
86 85
  private = list(
87 86
    .properties = c("timeseries", "continuous"),
88 87
    .train = function(task) {
@@ -94,7 +93,6 @@
Loading
94 93
      fit_object <- call_with_args(gts, args, silent = TRUE)
95 94
      return(fit_object)
96 95
    },
97 -
98 96
    .predict = function(task = NULL) {
99 97
      args <- self$params
100 98
      # get horizon based on training and testing tasks

@@ -1,29 +1,42 @@
Loading
1 1
#' Automatically Defined Metalearner
2 2
#'
3 -
#' A sensible metalearner based on \code{\link{Lrnr_solnp}} is chosen based on outcome type.
4 -
#' This amounts to choosing an appropriate loss function and combination function.
5 -
#' \tabular{rcc}{
6 -
#' Outcome Type \tab Combination Function \tab Loss Function \cr
7 -
#' binomial \tab metalearner_logistic_binomial \tab loss_squared_error \cr
8 -
#' categorical \tab metalearner_linear_multinomial \tab loss_loglik_multinomial \cr
9 -
#' continuous \tab metalearner_linear \tab loss_squared_error \cr
10 -
#' multivariate \tab metalearner_linear_multivariate \tab loss_squared_error_multivariate \cr
11 -
#' }
3 +
#' A sensible metalearner based on \code{\link{Lrnr_solnp}} is chosen based on
4 +
#' outcome type. This amounts to choosing an appropriate loss function and
5 +
#' combination function.
6 +
#'
7 +
#' @details
8 +
#'  | Outcome Type | Combination Function | Loss Function |
9 +
#'  |:-------------|:---------------------| :-------------|
10 +
#'  | binomial | metalearner_logistic_binomial | loss_squared_error |
11 +
#'  | categorical | metalearner_linear_multinomial | loss_loglik_multinomial |
12 +
#'  | continuous | metalearner_linear | loss_squared_error |
13 +
#'  | multivariate | metalearner_linear_multivariate | loss_squared_error_multivariate |
12 14
#'
13 15
#' @param outcome_type a Variable_Type object
14 16
default_metalearner <- function(outcome_type) {
15 17
  outcome_type <- outcome_type$type
16 18
  if (outcome_type %in% c("constant", "binomial")) {
17 -
    learner <- make_learner(Lrnr_solnp, metalearner_logistic_binomial, loss_squared_error)
19 +
    learner <- make_learner(
20 +
      Lrnr_solnp, metalearner_logistic_binomial,
21 +
      loss_squared_error
22 +
    )
18 23
  } else if (outcome_type == "categorical") {
19 -
    learner <- make_learner(Lrnr_solnp, metalearner_linear_multinomial, loss_loglik_multinomial)
24 +
    learner <- make_learner(
25 +
      Lrnr_solnp, metalearner_linear_multinomial,
26 +
      loss_loglik_multinomial
27 +
    )
20 28
  } else if (outcome_type == "continuous") {
21 -
    learner <- make_learner(Lrnr_solnp, metalearner_linear, loss_squared_error)
29 +
    learner <- make_learner(
30 +
      Lrnr_solnp, metalearner_linear,
31 +
      loss_squared_error
32 +
    )
22 33
  } else if (outcome_type == "multivariate") {
23 -
    learner <- make_learner(Lrnr_solnp, metalearner_linear_multivariate, loss_squared_error_multivariate)
34 +
    learner <- make_learner(
35 +
      Lrnr_solnp, metalearner_linear_multivariate,
36 +
      loss_squared_error_multivariate
37 +
    )
24 38
  } else {
25 -
    stop(sprintf("Outcome type %s does not have a default metalearner. Please specify your own"))
39 +
    stop(sprintf("Outcome type %s does not have a default metalearner."))
26 40
  }
27 -
28 41
  return(learner)
29 42
}

@@ -1,6 +1,6 @@
Loading
1 1
#' Cross-Validated Selector
2 2
#'
3 -
#' This meta-learner identifies the cross-validated selector (i.e. discrete
3 +
#' This meta-learner identifies the cross-validated selector (i.e., discrete
4 4
#' super learner) for any loss function.
5 5
#'
6 6
#' @docType class
@@ -11,48 +11,62 @@
Loading
11 11
#'
12 12
#' @keywords data
13 13
#'
14 -
#' @return Learner object with methods for training and prediction. See
15 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
14 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
15 +
#'  methods for training and prediction. For a full list of learner
16 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
16 17
#'
17 -
#' @format \code{\link{R6Class}} object.
18 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
19 +
#'  \code{\link{Lrnr_base}}.
18 20
#'
19 21
#' @family Learners
20 22
#'
21 23
#' @section Parameters:
22 -
#' \describe{
23 -
#'   \item{\code{loss_function=loss_squared_error}}{A function(pred, truth)
24 -
#'     that takes prediction and truth vectors and returns a loss vector. See
25 -
#'     \link{loss_functions} for options.}
26 -
#'   \item{\code{...}}{Not currently used.}
27 -
#' }
24 +
#'   - \code{loss_function}: A function that takes a vector of predictions as
25 +
#'     it's first argument, and a vector of truths/observations as it's second
26 +
#'     argument, and then returns a vector of losses. See \link{loss_functions}
27 +
#'     for options.
28 28
#'
29 -
#' @template common_parameters
30 -
#
29 +
#' @examples
30 +
#' data(cpp_imputed)
31 +
#' covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs")
32 +
#' task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz")
33 +
#'
34 +
#' hal_lrnr <- Lrnr_hal9001$new(
35 +
#'   max_degree = 1, num_knots = c(20, 10), smoothness_orders = 0
36 +
#' )
37 +
#' lasso_lrnr <- Lrnr_glmnet$new()
38 +
#' glm_lrnr <- Lrnr_glm$new()
39 +
#' ranger_lrnr <- Lrnr_ranger$new()
40 +
#' lrnrs <- c(hal_lrnr, lasso_lrnr, glm_lrnr, ranger_lrnr)
41 +
#' names(lrnrs) <- c("hal", "lasso", "glm", "ranger")
42 +
#' lrnr_stack <- make_learner(Stack, lrnrs)
43 +
#' metalrnr_discrete_MSE <- Lrnr_cv_selector$new(loss_squared_error)
44 +
#' discrete_sl <- Lrnr_sl$new(
45 +
#'   learners = lrnr_stack, metalearner = metalrnr_discrete_MSE
46 +
#' )
47 +
#' discrete_sl_fit <- discrete_sl$train(task)
48 +
#' discrete_sl_fit$cv_risk
31 49
Lrnr_cv_selector <- R6Class(
32 50
  classname = "Lrnr_cv_selector",
33 -
  inherit = Lrnr_base, portable = TRUE,
34 -
  class = TRUE,
51 +
  inherit = Lrnr_base, portable = TRUE, class = TRUE,
35 52
  public = list(
36 -
    initialize = function(loss_function = loss_squared_error,
37 -
                          ...) {
53 +
    initialize = function(loss_function = loss_squared_error) {
38 54
      params <- args_to_list()
39 -
      super$initialize(params = params, ...)
55 +
      super$initialize(params = params)
40 56
    }
41 57
  ),
42 58
  private = list(
43 59
    .properties = c(
44 -
      "continuous", "binomial", "categorical", "weights"
60 +
      "continuous", "binomial", "categorical", "weights", "wrapper"
45 61
    ),
46 -
47 62
    .train = function(task) {
48 -
      verbose <- getOption("sl3.verbose")
49 -
      params <- self$params
50 -
      loss_function <- params$loss_function
51 -
      outcome_type <- self$get_outcome_type(task)
63 +
      loss_function <- self$params$loss_function
52 64
53 65
      # specify data
66 +
      outcome_type <- self$get_outcome_type(task)
54 67
      X <- as.matrix(task$X)
55 68
      Y <- outcome_type$format(task$Y)
69 +
56 70
      weights <- task$weights
57 71
58 72
      risk <- function(preds) {
@@ -71,9 +85,7 @@
Loading
71 85
72 86
      return(fit_object)
73 87
    },
74 -
75 88
    .predict = function(task = NULL) {
76 -
      verbose <- getOption("sl3.verbose")
77 89
      X <- as.matrix(task$X)
78 90
      predictions <- X[, self$fit_object$name]
79 91
      return(predictions)

@@ -37,7 +37,6 @@
Loading
37 37
  ),
38 38
  private = list(
39 39
    .properties = c("screener"),
40 -
41 40
    .train = function(task) {
42 41
      screener <- self$params$screener
43 42
      screener_fit <- screener$train(task)
@@ -51,11 +50,9 @@
Loading
51 50
      )
52 51
      return(fit_object)
53 52
    },
54 -
55 53
    .predict = function(task) {
56 -
      task$X[, private$.fit_object$selected, with = FALSE, drop = FALSE]
54 +
      task$data[, private$.fit_object$selected, with = FALSE, drop = FALSE]
57 55
    },
58 -
59 56
    .chain = function(task) {
60 57
      return(task$next_in_chain(covariates = private$.fit_object$selected))
61 58
    },

@@ -1,21 +1,12 @@
Loading
1 -
## ------------------------------------------------------------------------
2 -
## Faster GLM with speedglm, fall back on glm.fit in case of error
3 -
## - Always use the internal fitting function (speedglm.wfit, glm.fit)
4 -
## - GLM objects are stripped of all the junk (minimal memory footprint)
5 -
## - No formula interface (design mat is the input data.table in task$X)
6 -
## - Separate interface for interactions (params[["interactions"]])
7 -
## - Can override the covariates with a subset of those in task$nodes$covariates
8 -
##   (params[["covariates"]])
9 -
## - All predictions are based on external matrix multiplication with a
10 -
##   family-based link functions
11 -
## ------------------------------------------------------------------------
12 -
13 -
#' Computationally Efficient GLMs
1 +
#' Computationally Efficient Generalized Linear Model (GLM) Fitting
14 2
#'
15 -
#' This learner provides faster fitting procedures for generalized linear models
16 -
#' using the \code{speedglm} package. Arguments are passed to
17 -
#' \code{\link[speedglm]{speedglm.wfit}}. Uses \code{\link[stats]{glm.fit}} as a
18 -
#' fallback if \code{\link[speedglm]{speedglm.wfit}} fails.
3 +
#' This learner provides faster procedures for fitting linear and generalized
4 +
#' linear models than \code{\link{Lrnr_glm}} with a minimal memory footprint.
5 +
#' This learner uses the internal fitting function provided by \pkg{speedglm}
6 +
#' package, \code{\link[speedglm]{speedglm.wfit}}. See
7 +
#' \insertCite{speedglm;textual}{sl3} for more detail. The
8 +
#' \code{\link[stats]{glm.fit}} function is used as a fallback, if
9 +
#' \code{\link[speedglm]{speedglm.wfit}} fails.
19 10
#'
20 11
#' @docType class
21 12
#'
@@ -26,46 +17,55 @@
Loading
26 17
#'
27 18
#' @keywords data
28 19
#'
29 -
#' @return Learner object with methods for training and prediction. See
30 -
#'  \code{\link{Lrnr_base}} for documentation on learners.
20 +
#' @return A learner object inheriting from \code{\link{Lrnr_base}} with
21 +
#'  methods for training and prediction. For a full list of learner
22 +
#'  functionality, see the complete documentation of \code{\link{Lrnr_base}}.
31 23
#'
32 -
#' @format \code{\link{R6Class}} object.
24 +
#' @format An \code{\link[R6]{R6Class}} object inheriting from
25 +
#'  \code{\link{Lrnr_base}}.
33 26
#'
34 27
#' @family Learners
35 28
#'
36 29
#' @section Parameters:
37 -
#' \describe{
38 -
#'   \item{\code{intercept=TRUE}}{If \code{TRUE}, an intercept term is included}
39 -
#'   \item{\code{method="Cholesky"}}{The matrix decomposition method to use}
40 -
#'   \item{\code{...}}{Other parameters to be passed to
41 -
#'     \code{\link[speedglm]{speedglm.wfit}}.}
42 -
#' }
30 +
#'   - \code{intercept = TRUE}: Should an intercept be included in the model?
31 +
#'   - \code{method = "Cholesky"}: The method to check for singularity.
32 +
#'   - \code{...}: Other parameters to be passed to
33 +
#'       \code{\link[speedglm]{speedglm.wfit}}.
34 +
#'
35 +
#' @references
36 +
#'  \insertAllCited{}
43 37
#'
44 -
#' @template common_parameters
45 -
#
38 +
#' @examples
39 +
#' data(cpp_imputed)
40 +
#' covs <- c("apgar1", "apgar5", "parity", "gagebrth", "mage", "meducyrs")
41 +
#' task <- sl3_Task$new(cpp_imputed, covariates = covs, outcome = "haz")
42 +
#'
43 +
#' # simple, main-terms GLM
44 +
#' lrnr_glm_fast <- Lrnr_glm_fast$new(method = "eigen")
45 +
#' glm_fast_fit <- lrnr_glm_fast$train(task)
46 +
#' glm_fast_preds <- glm_fast_fit$predict()
46 47
Lrnr_glm_fast <- R6Class(
47 -
  classname = "Lrnr_glm_fast", inherit = Lrnr_base,
48 -
  portable = TRUE, class = TRUE,
48 +
  classname = "Lrnr_glm_fast",
49 +
  inherit = Lrnr_base, portable = TRUE, class = TRUE,
49 50
  public = list(
50 51
    initialize = function(intercept = TRUE, method = "Cholesky", ...) {
51 52
      super$initialize(params = args_to_list(), ...)
52 53
    }
53 54
  ),
54 -
55 55
  private = list(
56 56
    .default_params = list(method = "Cholesky"),
57 -
58 57
    .properties = c("continuous", "binomial", "weights", "offset"),
59 -
60 58
    .train = function(task) {
61 -
      verbose <- getOption("sl3.transform.offset")
59 +
      verbose <- getOption("sl3.verbose")
60 +
62 61
      args <- self$params
62 +
63 63
      outcome_type <- self$get_outcome_type(task)
64 +
      args$y <- outcome_type$format(task$Y)
64 65
65 66
      if (is.null(args$family)) {
66 67
        args$family <- outcome_type$glm_family(return_object = TRUE)
67 68
      }
68 -
69 69
      family_name <- args$family$family
70 70
      linkinv_fun <- args$family$linkinv
71 71
      link_fun <- args$family$linkfun
@@ -76,8 +76,6 @@
Loading
76 76
      } else {
77 77
        args$X <- as.matrix(task$X)
78 78
      }
79 -
      args$y <- outcome_type$format(task$Y)
80 -
      args$trace <- FALSE
81 79
82 80
      if (task$has_node("weights")) {
83 81
        args$weights <- task$weights
@@ -102,7 +100,7 @@
Loading
102 100
        if (verbose) {
103 101
          message(paste(
104 102
            "speedglm::speedglm.wfit failed, falling back on",
105 -
            "stats:glm.fit;", fit_object
103 +
            "stats::glm.fit;", fit_object
106 104
          ))
107 105
        }
108 106
        args$ctrl <- glm.control(trace = FALSE)
@@ -130,9 +128,9 @@
Loading
130 128
      fit_object$training_offset <- task$has_node("offset")
131 129
      return(fit_object)
132 130
    },
133 -
134 131
    .predict = function(task = NULL) {
135 132
      verbose <- getOption("sl3.verbose")
133 +
136 134
      if (self$params$intercept) {
137 135
        X <- task$X_intercept
138 136
      } else {

@@ -1,4 +1,4 @@
Loading
1 -
#' Base Class for all sl3 Learners.
1 +
#' Base Class for all sl3 Learners
2 2
#'
3 3
#' Generally this base learner class should not be instantiated. Intended to be
4 4
#' an abstract class, although abstract classes are not explicitly supported
@@ -8,6 +8,7 @@
Loading
8 8
#' @docType class
9 9
#'
10 10
#' @importFrom R6 R6Class
11 +
#' @importFrom Rdpack reprompt
11 12
#'
12 13
#' @export
13 14
#'
@@ -42,7 +43,6 @@
Loading
42 43
43 44
      invisible(self)
44 45
    },
45 -
46 46
    subset_covariates = function(task) {
47 47
      # learners subset task covariates based on their covariate set
48 48
      if ("covariates" %in% names(self$params) &&
@@ -86,13 +86,15 @@
Loading
86 86
          colnames(delta_missing_data) <- delta_missing
87 87
          cols <- task$add_columns(data.table(delta_missing_data))
88 88
89 -
          return(task$next_in_chain(covariates = ord_covs, column_names = cols))
89 +
          return(task$next_in_chain(
90 +
            covariates = ord_covs,
91 +
            column_names = cols
92 +
          ))
90 93
        }
91 94
      } else {
92 95
        return(task)
93 96
      }
94 97
    },
95 -
96 98
    get_outcome_type = function(task) {
97 99
      outcome_type <- task$outcome_type
98 100
      if (!is.null(self$params$outcome_type)) {
@@ -105,7 +107,6 @@
Loading
105 107
      }
106 108
      return(outcome_type)
107 109
    },
108 -
109 110
    get_outcome_range = function(task = NULL, fold_number = "full") {
110 111
      # return the support of learner
111 112
      # if task is specified, return task observations based supports
@@ -133,7 +134,6 @@
Loading
133 134
      }
134 135
      return(range)
135 136
    },
136 -
137 137
    base_train = function(task, trained_sublearners = NULL) {
138 138
139 139
      # trains learner to data
@@ -151,7 +151,6 @@
Loading
151 151
      new_object$set_train(fit_object, subsetted_task)
152 152
      return(new_object)
153 153
    },
154 -
155 154
    set_train = function(fit_object, training_task) {
156 155
      private$.fit_object <- fit_object
157 156
      # for predict/chaining subset covariates to be same as training task
@@ -168,16 +167,14 @@
Loading
168 167
      }
169 168
      private$.fit_uuid <- UUIDgenerate(use.time = TRUE)
170 169
    },
171 -
172 170
    assert_trained = function() {
173 171
      if (!self$is_trained) {
174 172
        stop(paste(
175 -
          "Learner has not yet been train to data.",
173 +
          "Learner has not yet been trained to data.",
176 174
          "Call learner$train(task) first."
177 175
        ))
178 176
      }
179 177
    },
180 -
181 178
    base_predict = function(task = NULL) {
182 179
      self$assert_trained()
183 180
      if (is.null(task)) {
@@ -194,7 +191,6 @@
Loading
194 191
      }
195 192
      return(predictions)
196 193
    },
197 -
198 194
    base_chain = function(task = NULL) {
199 195
      self$assert_trained()
200 196
      if (is.null(task)) {
@@ -211,14 +207,12 @@
Loading
211 207
      }
212 208
      return(next_task)
213 209
    },
214 -
215 210
    train_sublearners = function(task) {
216 211
      # TODO: add error handling
217 212
      subsetted_task <- delayed_learner_subset_covariates(self, task)
218 213
219 214
      return(private$.train_sublearners(subsetted_task))
220 215
    },
221 -
222 216
    train = function(task) {
223 217
      delayed_fit <- delayed_learner_train(self, task)
224 218
      verbose <- getOption("sl3.verbose")
@@ -227,50 +221,46 @@
Loading
227 221
        progress = verbose
228 222
      ))
229 223
    },
230 -
231 224
    predict = function(task = NULL) {
232 225
      delayed_preds <- delayed_learner_fit_predict(self, task)
233 226
      return(delayed_preds$compute(job_type = sl3_delayed_job_type()))
234 227
    },
235 -
236 228
    sample = function(task, n_samples = 30, fold_number = "full") {
237 229
      stop("This learner does not have a sampling method.")
238 230
    },
239 -
240 231
    chain = function(task = NULL) {
241 232
      delayed_chained <- delayed_learner_fit_chain(self, task)
242 233
      return(delayed_chained$compute(job_type = sl3_delayed_job_type()))
243 234
    },
244 -
245 235
    print = function() {
246 236
      print(self$name)
247 237
      # print(self$params)
248 238
      fit_object <- private$.fit_object
249 239
      if (!is.null(fit_object)) print(fit_object)
250 240
    },
251 -
252 241
    custom_chain = function(new_chain_fun = NULL) {
253 242
      private$.custom_chain <- new_chain_fun
254 243
    },
255 -
256 244
    predict_fold = function(task, fold_number = "full") {
257 245
      # support legacy "magic number" definitions
258 246
      fold_number <- interpret_fold_number(fold_number)
259 247
      # for non-CV learners, do full predict no matter what, but warn about it
260 248
      # if fold_number is something else
261 249
      if (fold_number != "full") {
262 -
        warning(self$name, " is not a cv-aware learner, so self$predict_fold reverts to self$predict")
250 +
        warning(
251 +
          self$name,
252 +
          " is not cv-aware: self$predict_fold reverts to self$predict"
253 +
        )
263 254
      }
264 255
      self$predict(task)
265 256
    },
266 -
267 257
    reparameterize = function(new_params) {
268 258
      # modify learner parameters
269 259
      new_self <- self$clone()
270 -
      new_self$.__enclos_env__$private$.params[names(new_params)] <- new_params[]
260 +
      new_self$.__enclos_env__$private$.params[names(new_params)] <-
261 +
        new_params[]
271 262
      return(new_self)
272 263
    },
273 -
274 264
    retrain = function(new_task, trained_sublearners = NULL) {
275 265
276 266
      # retrains fitted learner on a new task
@@ -288,7 +278,11 @@
Loading
288 278
        new_self$.__enclos_env__$private$.params <- params_no_covars
289 279
      }
290 280
      if (!is.null(trained_sublearners)) {
291 -
        new_fit_object <- new_self$.__enclos_env__$private$.train(new_task, trained_sublearners)
281 +
        new_fit_object <-
282 +
          new_self$.__enclos_env__$private$.train(
283 +
            new_task,
284 +
            trained_sublearners
285 +
          )
292 286
      } else {
293 287
        new_fit_object <- new_self$.__enclos_env__$private$.train(new_task)
294 288
      }
@@ -297,7 +291,6 @@
Loading
297 291
      return(new_object)
298 292
    }
299 293
  ),
300 -
301 294
  active = list(
302 295
    is_trained = function() {
303 296
      return(!is.null(private$.fit_object))
@@ -306,7 +299,6 @@
Loading
306 299
      fit_object <- private$.fit_object
307 300
      return(fit_object)
308 301
    },
309 -
310 302
    name = function() {
311 303
      # TODO: allow custom names
312 304
      if (is.null(private$.name)) {
@@ -322,31 +314,24 @@
Loading
322 314
      }
323 315
      return(private$.name)
324 316
    },
325 -
326 317
    learner_uuid = function() {
327 318
      return(private$.learner_uuid)
328 319
    },
329 -
330 320
    fit_uuid = function() {
331 321
      return(private$.fit_uuid)
332 322
    },
333 -
334 323
    params = function() {
335 324
      return(private$.params)
336 325
    },
337 -
338 326
    training_task = function() {
339 327
      return(private$.training_task)
340 328
    },
341 -
342 329
    training_outcome_type = function() {
343 330
      return(private$.training_outcome_type)
344 331
    },
345 -
346 332
    properties = function() {
347 333
      return(private$.properties)
348 334
    },
349 -
350 335
    coefficients = function() {
351 336
      self$assert_trained()
352 337
      coefs <- try(coef(self$fit_object))
@@ -357,7 +342,6 @@
Loading
357 342
      }
358 343
    }
359 344
  ),
360 -
361 345
  private = list(
362 346
    .name = NULL,
363 347
    .fit_object = NULL,
@@ -369,24 +353,20 @@
Loading
369 353
    .required_packages = NULL,
370 354
    .properties = list(),
371 355
    .custom_chain = NULL,
372 -
373 356
    .train_sublearners = function(task) {
374 357
      # train sublearners here
375 358
      return(NULL)
376 359
    },
377 -
378 360
    .train = function(task) {
379 361
      stop(paste(
380 362
        "Learner is meant to be abstract, you should instead use",
381 -
        "specific learners. See listLearners()"
363 +
        "specific learners. See sl3_list_learners()"
382 364
      ))
383 365
    },
384 -
385 366
    .predict = function(task) {
386 367
      predictions <- predict(private$.fit_object, newdata = task$X)
387 368
      return(predictions)
388 369
    },
389 -
390 370
    .chain = function(task) {
391 371
      predictions <- self$predict(task)
392 372
      predictions <- as.data.table(predictions)
@@ -399,7 +379,6 @@
Loading
399 379
        column_names = new_col_names
400 380
      ))
401 381
    },
402 -
403 382
    .load_packages = function() {
404 383
      if (!is.null(private$.required_packages)) {
405 384
        requirePackages(

@@ -49,8 +49,7 @@
Loading
49 49
    }
50 50
  ),
51 51
  private = list(
52 -
    .properties = c("continuous", "binomial"),
53 -
52 +
    .properties = c("continuous", "binomial", "wrapper"),
54 53
    .train = function(task) {
55 54
      args <- self$params
56 55
      args$X <- as.matrix(task$X)

@@ -23,24 +23,26 @@
Loading
23 23
#'   \item{\code{learner}}{An instantiated learner to use for estimating
24 24
#'     coefficients used in screening.}
25 25
#'   \item{\code{threshold = 1e-3}}{Minimum size of coefficients to be kept.}
26 -
#'   \item{\code{max_retain = NULL}}{Maximum no. variables to be kept.}
26 +
#'   \item{\code{max_screen = NULL}}{Maximum number of covariates to be kept.}
27 +
#'   \item{\code{min_screen = 2}}{Maximum number of covariates to be kept. Only
28 +
#'     applicable when supplied \code{learner} is a \code{\link{Lrnr_glmnet}}.}
27 29
#'   \item{\code{...}}{Other parameters passed to \code{learner}.}
28 30
#' }
29 31
Lrnr_screener_coefs <- R6Class(
30 32
  classname = "Lrnr_screener_coefs",
31 33
  inherit = Lrnr_base, portable = TRUE, class = TRUE,
32 34
  public = list(
33 -
    initialize = function(learner, threshold = 1e-3, max_retain = NULL, ...) {
34 -
      params <- args_to_list()
35 -
      super$initialize(params = params, ...)
35 +
    initialize = function(learner, threshold = 0, max_screen = NULL,
36 +
                          min_screen = 2, ...) {
37 +
      super$initialize(params = args_to_list(), ...)
36 38
    }
37 39
  ),
38 40
  private = list(
39 41
    .properties = c("screener"),
40 -
41 42
    .train = function(task) {
42 -
      learner <- self$params$learner
43 -
      fit <- learner$train(task)
43 +
      args <- self$params
44 +
45 +
      fit <- args$learner$train(task)
44 46
      coefs <- as.vector(coef(fit))
45 47
      coef_names <- rownames(coef(fit))
46 48
      if (is.null(coef_names)) {
@@ -53,28 +55,71 @@
Loading
53 55
54 56
      covs <- task$nodes$covariates
55 57
56 -
      selected_coefs <- coef_names[which(abs(coefs) > self$params$threshold)]
58 +
      selected_coefs <- coef_names[which(abs(coefs) > args$threshold)]
57 59
      selected_coefs <- unique(gsub("\\..*", "", selected_coefs))
58 60
      selected <- intersect(selected_coefs, covs)
59 61
60 -
      if (!is.null(self$params$max_retain) &&
61 -
        (self$params$max_retain < length(selected))) {
62 -
        ord_coefs <- coef_names[order(abs(coefs), decreasing = TRUE)]
63 -
        ord_coefs <- unique(gsub("\\..*", "", ord_coefs))
64 -
        selected <- intersect(ord_coefs, covs)[1:self$params$max_retain]
62 +
      if (!is.null(args$max_screen)) {
63 +
        if (args$max_screen < length(selected)) {
64 +
          ord_coefs <- coef_names[order(abs(coefs), decreasing = TRUE)]
65 +
          ord_coefs <- unique(gsub("\\..*", "", ord_coefs))
66 +
          selected <- intersect(ord_coefs, covs)[1:args$max_screen]
67 +
        }
68 +
      }
69 +
70 +
      if (length(selected) < args$min_screen) {
71 +
        if ("lambda" %in% names(args$learner$params)) {
72 +
          warning(
73 +