sahirbhatnagar / casebase
1
#' @rdname absoluteRisk
2
#' @importFrom data.table := data.table
3
#' @export
4
absoluteRisk.CompRisk <- function(object, time, newdata,
5
                                  method = c("numerical", "montecarlo"),
6
                                  nsamp = 100, onlyMain = TRUE,
7
                                  type = c("CI", "survival"),
8
                                  addZero = TRUE) {
9 2
    method <- match.arg(method)
10 2
    type <- match.arg(type)
11

12 2
    if (missing(newdata)) {
13
        # Should we use the whole case-base dataset or the original one?
14 0
        if (is.null(object@originalData)) {
15 0
            stop(paste("Cannot estimate the mean absolute risk without",
16 0
                       "the original data. See documentation."),
17 0
                 call. = FALSE)
18
        }
19 0
        newdata <- object@originalData
20 0
        unselectTime <- (names(newdata) != object@timeVar)
21 0
        newdata <- subset(newdata, select = unselectTime)
22
    }
23 2
    typeEvents <- object@typeEvents
24
    ###################################################
25
    # In competing risks, we can get a cumulative
26
    # incidence function using a nested double integral
27
    # subdensity f_j = lambda_j * Survival
28
    # F_j = P(T <= t, J = j : covariates) = int_0^t f_j
29
    ###################################################
30 2
    time_ordered <- unique(c(0, sort(time)))
31
    # Create array to store output
32 2
    if (onlyMain) {
33 2
        output <- matrix(NA, ncol = nrow(newdata) + 1,
34 2
                         nrow = length(time_ordered))
35 2
        output[, 1] <- time_ordered
36 2
        colnames(output) <- c("time", rep("", nrow(newdata)))
37 2
        rownames(output) <- rep("", length(time_ordered))
38 2
        output[1, -1] <- 0
39
    } else {
40 0
        J <- length(typeEvents) - 1
41 0
        output <- array(NA, dim = c(length(time_ordered), nrow(newdata) + 1, J))
42 0
        output[, 1, ] <- time_ordered
43 0
        dimnames(output) <- list(rep("", length(time_ordered)),
44 0
                                 c("time", rep("", nrow(newdata))),
45 0
                                 paste("event", typeEvents[-1], sep = "="))
46 0
        output[1, -1, ] <- 0
47
    }
48

49
    # Compute subdensities
50 2
    if (method == "numerical") {
51
        # Compute points at which we evaluate integral
52 2
        knots <- seq(0, max(time_ordered),
53 2
                     length.out = (length(time_ordered) - 1) * nsamp)
54 2
        knots <- unique(sort(c(knots, time_ordered)))
55

56 2
        for (j in seq_len(nrow(newdata))) {
57
            # Extract current obs
58 2
            current_obs <- newdata[j, , drop = FALSE]
59
            # Create data.table for prediction
60 2
            newdata2 <- data.table::data.table(current_obs)
61 2
            newdata2 <- newdata2[rep(1, length(knots))]
62 2
            newdata2[, object@timeVar := knots]
63 2
            newdata2[, "offset" := 0]
64
            # Compute all values for all hazards
65 2
            lambdas <- exp(predict_CompRisk(object, newdata2))
66 2
            lambdas[which(lambdas %in% c(Inf, -Inf))] <- 0
67 2
            OverallLambda <- rowSums(lambdas)
68 2
            survFunction <- exp(-trap_int(knots, OverallLambda))
69 2
            if (onlyMain) {
70
                # Only compute first subdensity
71 2
                subdensity <- lambdas[, 1] * survFunction
72 2
                pred <- trap_int(knots, subdensity)[knots %in% c(0, time)]
73 2
                output[, j + 1] <- pred
74
            } else {
75 0
                subdensity <- lambdas * drop(survFunction)
76 0
                pred <- trap_int(knots, subdensity)[knots %in% c(0, time)]
77 0
                output[, j + 1, ] <- pred
78
            }
79
        }
80
    } else {
81
        # Sample points at which we evaluate function
82 2
        knots <- runif(n = (length(time_ordered) - 1) * nsamp^2,
83 2
                       min = 0, max = max(time_ordered))
84 2
        knots2 <- runif(n = (length(time_ordered) - 1) * nsamp,
85 2
                        min = 0, max = max(time_ordered))
86 2
        knots2 <- sort(knots2)
87 2
        for (j in seq_len(nrow(newdata))) {
88 2
            current_obs <- newdata[j, , drop = FALSE]
89
            # Create data.table for prediction
90 2
            newdata2 <- data.table(current_obs)
91 2
            newdata2 <- newdata2[rep(1, length(knots))]
92 2
            newdata2[, object@timeVar := knots]
93 2
            newdata2[, "offset" := 0]
94
            # Compute all values for all hazards
95 2
            lambdas <- exp(predict_CompRisk(object, newdata2))
96 2
            lambdas[which(lambdas %in% c(Inf, -Inf))] <- 0
97 2
            OverallLambda <- rowSums(lambdas)
98 2
            mean_values <- sapply(split(OverallLambda,
99 2
                                        cut(knots, breaks = knots2)),
100 2
                                  mean, na.rm = TRUE)
101 2
            mean_values[is.na(mean_values)] <- 0
102 2
            survFunction <- exp(-cumsum(c(0, mean_values * diff(knots2))))
103
            # Second integral---Create data.table for prediction
104 2
            newdata2 <- data.table(current_obs)
105 2
            newdata2 <- newdata2[rep(1, length(knots2))]
106 2
            newdata2[, object@timeVar := knots2]
107 2
            newdata2[, "offset" := 0]
108
            # Compute all values for all hazards
109 2
            lambdas2 <- exp(predict_CompRisk(object, newdata2))
110 2
            lambdas2[which(lambdas2 %in% c(Inf, -Inf))] <- 0
111 2
            if (onlyMain) {
112
                # Only compute first subdensity
113 2
                subdensity <- lambdas2[, 1] * survFunction
114 2
                mean_values2 <- sapply(split(subdensity,
115 2
                                             cut(knots2,
116 2
                                                 breaks = time_ordered)),
117 2
                                       mean, na.rm = TRUE)
118 2
                pred <- cumsum(c(0, mean_values2 * diff(time_ordered)))
119 2
                output[, j + 1] <- pred
120
            } else {
121 0
                subdensity <- lambdas2 * drop(survFunction)
122 0
                mean_values2 <- sapply(split(seq_len(nrow(subdensity)),
123 0
                                             cut(knots2,
124 0
                                                 breaks = time_ordered)),
125 0
                                       function(ind) {
126 0
                                           colMeans(subdensity[ind, ,
127 0
                                                               drop = FALSE],
128 0
                                                    na.rm = TRUE)
129
                                           })
130 0
                pred <- apply(mean_values2, 1, function(row) {
131 0
                    cumsum(c(0, row * diff(time_ordered)))
132
                    })
133 0
                output[, j + 1, ] <- pred
134
            }
135
        }
136
    }
137

138 2
    if (onlyMain) {
139
        # Switch to survival scale?
140 2
        if (type == "survival") {
141 2
            output[, -1] <- 1 - output[, -1, drop = FALSE]
142
        }
143
        # Add time = 0?
144 0
        if (!addZero) output <- output[-1, , drop = FALSE]
145
    } else {
146
        # Switch to survival scale?
147 0
        if (type == "survival") {
148 0
            output[, -1, ] <- 1 - output[, -1, ]
149
        }
150
        # Add time = 0?
151 0
        if (!addZero) output <- output[-1, , , drop = FALSE]
152

153
    }
154
    # Sometimes montecarlo integration gives nonsensical probability estimates
155 2
    if (method == "montecarlo" && (any(output < 0) | any(output > 1))) {
156 2
        warning(paste("Some probabilities are out of range. Consider",
157 2
                      "increasing nsamp or using numerical integration"),
158 2
                call. = FALSE)
159
    }
160 2
    attr(output, "type") <- type
161 2
    return(output)
162

163
}
164

165
# #' @rdname absoluteRisk
166
# #' @export
167
absoluteRisk.CompRiskGlmnet <- function(object, time, newdata,
168
                                        method = c("numerical", "montecarlo"),
169
                                        nsamp = 100, onlyMain = TRUE,
170
                                        s = c("lambda.1se", "lambda.min"),
171
                                        type = c("CI", "survival"),
172
                                        addZero = TRUE, ...) {
173
    # The current implementation doesn't work
174 0
    stop(paste("object is of class", class(object),
175 0
               "\nabsoluteRisk should be used with an object of class glm,",
176 0
               "cv.glmnet, gbm, or CompRisk"),
177 0
         call. = TRUE)
178
}
179

180
#' @importFrom stats coef
181
predict_CompRisk <- function(object, newdata = NULL) {
182 2
    ttob <- terms(object)
183 2
    X <- model.matrix(delete.response(ttob), newdata,
184 2
                      contrasts = if (length(object@contrasts)) {
185 0
                          object@contrasts
186 2
                          } else NULL,
187 2
                      xlev = object@xlevels)
188 2
    coeffs <- matrix(coef(object), nrow = ncol(X),
189 2
                     byrow = TRUE)
190 2
    preds <- X %*% coeffs
191 2
    colnames(preds) <- paste0("log(mu[,",
192 2
                              seq(2, length(object@typeEvents)),
193 2
                              "]/mu[,1])")
194 2
    return(preds)
195
}

Read our documentation on viewing source code .

Loading