tlverse / tmle3
Showing 2 of 2 files from the diff.

@@ -44,6 +44,17 @@
Loading
44 44
      super$initialize(observed_likelihood, ..., outcome_node = outcome_node)
45 45
      private$.cf_likelihood <- make_CF_Likelihood(observed_likelihood, intervention_list)
46 46
    },
47 +
    reshape_long_data = function(long_data, t_max) {
48 +
      n <- length(long_data) / t_max
49 +
      # TODO: assume long_data is a list
50 +
      rs <- list()
51 +
      for (i in 1:t_max) {
52 +
        current <- long_data[seq(1 + (i - 1) * n, i * n)]
53 +
        rs <- c(rs, list(current))
54 +
      }
55 +
      rs <- do.call(cbind, rs)
56 +
      return(rs)
57 +
    },
47 58
    hazards_to_survival = function(p_hazards, t_max) {
48 59
      # TODO: whether change the input data format to consecutive times for each observation
49 60
      n <- length(p_hazards) / t_max

@@ -0,0 +1,315 @@
Loading
1 +
#' Defines an update procedure (submodel+loss function) for survival data
2 +
#'
3 +
#' Current Limitations:
4 +
#' loss function and submodel are hard-coded (need to accept arguments for these)
5 +
#' @section Constructor:
6 +
#'   \code{define_param(maxit, cvtmle, one_dimensional, constrain_step, delta_epsilon, verbose)}
7 +
#'
8 +
#'   \describe{
9 +
#'     \item{\code{maxit}}{The maximum number of update iterations
10 +
#'     }
11 +
#'     \item{\code{cvtmle}}{If \code{TRUE}, use CV-likelihood values when
12 +
#'        calculating updates.
13 +
#'     }
14 +
#'     \item{\code{one_dimensional}}{If \code{TRUE}, collapse clever covariates
15 +
#'        into a one-dimensional clever covariate scaled by the mean of their
16 +
#'        EIFs.
17 +
#'     }
18 +
#'     \item{\code{constrain_step}}{If \code{TRUE}, step size is at most
19 +
#'        \code{delta_epsilon} (it can be smaller if a smaller step decreases
20 +
#'        the loss more).
21 +
#'     }
22 +
#'     \item{\code{delta_epsilon}}{The maximum step size allowed if
23 +
#'        \code{constrain_step} is \code{TRUE}.
24 +
#'     }
25 +
#'     \item{\code{convergence_type}}{The convergence criterion to use: (1)
26 +
#'        \code{"scaled_var"} corresponds to sqrt(Var(D)/n)/logn (the default)
27 +
#'        while (2) \code{"sample_size"} corresponds to 1/n.
28 +
#'     }
29 +
#'     \item{\code{fluctuation_type}}{Whether to include the auxiliary covariate
30 +
#'        for the fluctuation model as a covariate or to treat it as a weight.
31 +
#'        Note that the option \code{"weighted"} is incompatible with a
32 +
#'        multi-epsilon submodel (\code{one_dimensional = FALSE}).
33 +
#'     }
34 +
#'     \item{\code{verbose}}{If \code{TRUE}, diagnostic output is generated
35 +
#'        about the updating procedure.
36 +
#'     }
37 +
#'     }
38 +
#'
39 +
#' @importFrom R6 R6Class
40 +
#'
41 +
#' @export
42 +
#
43 +
tmle3_Update_survival <- R6Class(
44 +
  classname = "tmle3_Update_survival",
45 +
  portable = TRUE,
46 +
  class = TRUE,
47 +
  inherit = tmle3_Update,
48 +
  public = list(
49 +
    initialize = function(maxit = 100, cvtmle = TRUE, one_dimensional = FALSE,
50 +
                              constrain_step = FALSE, delta_epsilon = 1e-4,
51 +
                              convergence_type = c("scaled_var", "sample_size"),
52 +
                              fluctuation_type = c("standard", "weighted"),
53 +
                              verbose = FALSE,
54 +
                              fit_method = "l2",
55 +
                              clipping = 1e0) {
56 +
      super$initialize(maxit, cvtmle, one_dimensional,
57 +
                              constrain_step , delta_epsilon,
58 +
                              convergence_type,
59 +
                              fluctuation_type,
60 +
                              verbose)
61 +
     private$.fit_method <- fit_method
62 +
     private$.clipping <- clipping
63 +
    },
64 +
    # collapse_covariates = function(estimates, clever_covariates) {
65 +
    #   ED <- ED_from_estimates(estimates)
66 +
    #   EDnormed <- ED / norm(ED, type = "2")
67 +
    #   collapsed_covariate <- clever_covariates %*% EDnormed
68 +
69 +
    #   return(collapsed_covariate)
70 +
    # },
71 +
    # update_step = function(likelihood, tmle_task, fold_number = "full") {
72 +
73 +
    #   # get new submodel fit
74 +
    #   all_submodels <- self$generate_submodel_data(
75 +
    #     likelihood, tmle_task,
76 +
    #     fold_number
77 +
    #   )
78 +
    #   new_epsilons <- self$fit_submodels(all_submodels)
79 +
80 +
    #   # update likelihoods
81 +
    #   likelihood$update(new_epsilons, self$step_number, fold_number)
82 +
83 +
    #   if (fold_number != "full") {
84 +
    #     # update full fit likelihoods if we haven't already
85 +
    #     likelihood$update(new_epsilons, self$step_number, "full")
86 +
    #   }
87 +
    #   # increment step count
88 +
    #   private$.step_number <- private$.step_number + 1
89 +
    # },
90 +
    # generate_submodel_data = function(likelihood, tmle_task,
91 +
    #                                       fold_number = "full") {
92 +
    #   update_nodes <- self$update_nodes
93 +
94 +
    #   # TODO: support not getting observed for case where we're applying
95 +
    #   #       updates instead of fitting them
96 +
    #   clever_covariates <- lapply(self$tmle_params, function(tmle_param) {
97 +
    #     tmle_param$clever_covariates(tmle_task, fold_number)
98 +
    #   })
99 +
100 +
    #   observed_values <- lapply(update_nodes, tmle_task$get_tmle_node)
101 +
102 +
    #   all_submodels <- lapply(update_nodes, function(update_node) {
103 +
    #     node_covariates <- lapply(clever_covariates, `[[`, update_node)
104 +
    #     covariates_dt <- do.call(cbind, node_covariates)
105 +
    #     if (self$one_dimensional) {
106 +
    #       observed_task <- likelihood$training_task
107 +
    #       estimates <- lapply(self$tmle_params, function(tmle_param) {
108 +
    #         tmle_param$estimates(observed_task, fold_number)
109 +
    #       })
110 +
    #       covariates_dt <- self$collapse_covariates(estimates, covariates_dt)
111 +
    #     }
112 +
113 +
    #     observed <- tmle_task$get_tmle_node(update_node)
114 +
    #     initial <- likelihood$get_likelihood(
115 +
    #       tmle_task, update_node,
116 +
    #       fold_number
117 +
    #     )
118 +
119 +
    #     # scale observed and predicted values for bounded continuous
120 +
    #     observed <- tmle_task$scale(observed, update_node)
121 +
    #     initial <- tmle_task$scale(initial, update_node)
122 +
123 +
    #     # protect against qlogis(1)=Inf
124 +
    #     initial <- bound(initial, 0.005)
125 +
126 +
    #     submodel_data <- list(
127 +
    #       observed = observed,
128 +
    #       H = covariates_dt,
129 +
    #       initial = initial
130 +
    #     )
131 +
    #   })
132 +
133 +
    #   names(all_submodels) <- update_nodes
134 +
135 +
    #   return(all_submodels)
136 +
    # },
137 +
    norm_l2 = function(beta) {
138 +
      return(sqrt(sum(beta^2)))
139 +
    },
140 +
    fit_submodel = function(submodel_data) {
141 +
      if (self$fit_method == "l2") {
142 +
        suppressWarnings({
143 +
          alpha <- 0; norm_func <- self$norm_l2; lambda.min.ratio = 1e-2
144 +
          ind <- 1
145 +
          while (ind == 1) {
146 +
            submodel_fit <- glmnet::glmnet(
147 +
              x = submodel_data$H,
148 +
              y = submodel_data$observed,
149 +
              offset = qlogis(submodel_data$initial),
150 +
              family = "binomial",
151 +
              alpha = alpha,
152 +
              standardize = FALSE,
153 +
              intercept = FALSE,
154 +
              lambda.min.ratio = lambda.min.ratio,
155 +
              nlambda = 2e2
156 +
              )
157 +
              norms <- apply(submodel_fit$beta, 2, norm_func)
158 +
              ind <- max(which(norms <= self$clipping))
159 +
              if (ind > 1) break
160 +
              lambda.min.ratio <- (lambda.min.ratio + 1) / 2
161 +
          }
162 +
          epsilon_n <- submodel_fit$beta[, ind]
163 +
        })
164 +
      } 
165 +
        # else if (self$fluctuation_type == "standard") {
166 +
        #   suppressWarnings({
167 +
        #     submodel_fit <- glm(observed ~ H - 1, submodel_data,
168 +
        #       offset = qlogis(submodel_data$initial),
169 +
        #       family = binomial(),
170 +
        #       start = rep(0, ncol(submodel_data$H))
171 +
        #     )
172 +
        #   })
173 +
        # } else if (self$fluctuation_type == "weighted") {
174 +
        #   if (self$one_dimensional) {
175 +
        #     suppressWarnings({
176 +
        #       submodel_fit <- glm(observed ~ -1, submodel_data,
177 +
        #         offset = qlogis(submodel_data$initial),
178 +
        #         family = binomial(),
179 +
        #         weights = as.numeric(H),
180 +
        #         start = rep(0, ncol(submodel_data$H))
181 +
        #       )
182 +
        #     })
183 +
        #   } else {
184 +
        #     warning(
185 +
        #       "Updater detected `fluctuation_type='weighted'` but multi-epsilon submodel.\n",
186 +
        #       "This is incompatible. Proceeding with `fluctuation_type='standard'`."
187 +
        #     )
188 +
        #     suppressWarnings({
189 +
        #       submodel_fit <- glm(observed ~ H - 1, submodel_data,
190 +
        #         offset = qlogis(submodel_data$initial),
191 +
        #         family = binomial(),
192 +
        #         start = rep(0, ncol(submodel_data$H))
193 +
        #       )
194 +
        #     })
195 +
        #   }
196 +
        # }
197 +
      epsilon <- epsilon_n
198 +
199 +
        # TODO: check if necessary
200 +
        # NOTE: this protects against collinear covariates
201 +
        # (which we don't care about, we just want an update)
202 +
      epsilon[is.na(epsilon)] <- 0
203 +
204 +
      if (self$verbose) {
205 +
        cat(sprintf("epsilon: %e ", epsilon))
206 +
      }
207 +
208 +
      return(epsilon)
209 +
    }
210 +
    # fit_submodels = function(all_submodels) {
211 +
    #   all_epsilon <- lapply(all_submodels, self$fit_submodel)
212 +
213 +
    #   names(all_epsilon) <- names(all_submodels)
214 +
    #   private$.epsilons <- c(private$.epsilons, list(all_epsilon))
215 +
216 +
    #   return(all_epsilon)
217 +
    # },
218 +
    # submodel = function(epsilon, initial, H) {
219 +
    #   plogis(qlogis(initial) + H %*% epsilon)
220 +
    # },
221 +
    # loss_function = function(estimate, observed) {
222 +
    #   -1 * ifelse(observed == 1, log(estimate), log(1 - estimate))
223 +
    # },
224 +
    # apply_submodel = function(submodel_data, epsilon) {
225 +
    #   self$submodel(epsilon, submodel_data$initial, submodel_data$H)
226 +
    # },
227 +
    # apply_update = function(tmle_task, likelihood, fold_number, all_epsilon) {
228 +
    #   update_nodes <- self$update_nodes
229 +
230 +
    #   # get submodel data for all nodes
231 +
    #   all_submodel_data <- self$generate_submodel_data(
232 +
    #     likelihood, tmle_task,
233 +
    #     fold_number
234 +
    #   )
235 +
236 +
    #   # apply update to all nodes
237 +
    #   updated_likelihoods <- lapply(update_nodes, function(update_node) {
238 +
    #     submodel_data <- all_submodel_data[[update_node]]
239 +
    #     epsilon <- all_epsilon[[update_node]]
240 +
    #     updated_likelihood <- self$apply_submodel(submodel_data, epsilon)
241 +
242 +
    #     # un-scale to handle bounded continuous
243 +
    #     updated_likelihood <- tmle_task$unscale(
244 +
    #       updated_likelihood,
245 +
    #       update_node
246 +
    #     )
247 +
    #   })
248 +
    #   names(updated_likelihoods) <- update_nodes
249 +
250 +
    #   return(updated_likelihoods)
251 +
    # },
252 +
    # TODO: check
253 +
    # check_convergence = function(tmle_task, fold_number = "full") {
254 +
    #   estimates <- lapply(
255 +
    #     self$tmle_params,
256 +
    #     function(tmle_param) {
257 +
    #       tmle_param$estimates(tmle_task, fold_number = fold_number)
258 +
    #     }
259 +
    #   )
260 +
261 +
    #   if (self$convergence_type == "scaled_var") {
262 +
    #     # NOTE: the point of this criterion is to avoid targeting in an overly
263 +
    #     #       aggressive manner, as we simply need check that the following
264 +
    #     #       condition is met |P_n D*| / SE(D*) =< max(1/log(n), 1/10)
265 +
    #     IC <- do.call(cbind, lapply(estimates, `[[`, "IC"))
266 +
    #     se_Dstar <- sqrt(apply(IC, 2, var) / tmle_task$nrow)
267 +
    #     ED_threshold <- se_Dstar / min(log(tmle_task$nrow), 10)
268 +
    #   } else if (self$convergence_type == "sample_size") {
269 +
    #     ED_threshold <- 1 / tmle_task$nrow
270 +
    #   }
271 +
272 +
    #   # get |P_n D*| of any number of parameter estimates
273 +
    #   ED <- ED_from_estimates(estimates)
274 +
    #   ED_criterion <- abs(ED)
275 +
276 +
    #   if (self$verbose) {
277 +
    #     cat(sprintf("max(abs(ED)): %e\n", ED_criterion))
278 +
    #   }
279 +
    #   return(all(ED_criterion <= ED_threshold))
280 +
    # },
281 +
    # update = function(likelihood, tmle_task) {
282 +
    #   update_fold <- self$update_fold
283 +
    #   maxit <- private$.maxit
284 +
    #   for (steps in seq_len(maxit)) {
285 +
    #     self$update_step(likelihood, tmle_task, update_fold)
286 +
    #     if (self$check_convergence(tmle_task, update_fold)) {
287 +
    #       break
288 +
    #     }
289 +
    #   }
290 +
    # },
291 +
    # register_param = function(new_params) {
292 +
    #   if (inherits(new_params, "Param_base")) {
293 +
    #     new_params <- list(new_params)
294 +
    #   }
295 +
    #   private$.tmle_params <- c(private$.tmle_params, new_params)
296 +
    #   new_update_nodes <- unlist(lapply(new_params, `[[`, "update_nodes"))
297 +
    #   private$.update_nodes <- unique(c(
298 +
    #     private$.update_nodes,
299 +
    #     new_update_nodes
300 +
    #   ))
301 +
    # }
302 +
  ),
303 +
  active = list(
304 +
    fit_method = function() {
305 +
      return(private$.fit_method)
306 +
    },
307 +
    clipping = function() {
308 +
      return(private$.clipping)
309 +
    }
310 +
  ),
311 +
  private = list(
312 +
    .fit_method = NULL,
313 +
    .clipping = NULL
314 +
  )
315 +
)
Files Coverage
R 76.72%
Project Totals (45 files) 76.72%
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading