sahirbhatnagar / casebase
1
#' @title Plot Fitted Hazard Curve as a Function of Time
2
#' @description Visualize estimated hazard curves as a function of time with
3
#'   confidence intervals. This function takes as input, the result from the
4
#'   [casebase::fitSmoothHazard()] function. The user can also specify a
5
#'   sequence of times at which to estimate the hazard function. These plots are
6
#'   useful to visualize the non-proportional hazards, i.e., time dependent
7
#'   interactions with a covariate.
8
#' @param object Fitted object of class `glm`, `gam`, `cv.glmnet` or `gbm`. This
9
#'   is the result from the [casebase::fitSmoothHazard()] function.
10
#' @param newdata A data frame in which to look for variables with which to
11
#'   predict. This is required and must contain all the variables used in the
12
#'   model. Only one covariate profile can be used. If more than one row is
13
#'   provided, only the first row will be used.
14
#' @param type Type of plot. Currently, only "hazard" has been implemented.
15
#'   Default: c("hazard")
16
#' @param xlab x-axis label. Default: the name of the time variable from the
17
#'   fitted `object`.
18
#' @param breaks Number of points at which to estimate the hazard. This argument
19
#'   is only used if argument `times=NULL`. This function will calculate a
20
#'   sequence of times between the minimum and maximum of observed event times.
21
#'   Default: 100.
22
#' @param ci.lvl Confidence level. Must be in (0,1), Default: 0.95
23
#' @param ylab y-axis label. Default: NULL which means the function will put
24
#'   sensible defaults.
25
#' @param line.col Line color, Default: 1. See [graphics::par()] for details.
26
#' @param ci.col Confidence band color. Only used if argument `ci=TRUE`,
27
#'   Default: 'grey'
28
#' @param lty Line type. See [graphics::par()] for details, Default: par("lty")
29
#' @param add Logical; if TRUE add to an already existing plot; Default: FALSE
30
#' @param ci Logical; if TRUE confidence bands are calculated. Only available
31
#'   for `family="glm"` and `family="gam"`, Default: !add
32
#' @param rug Logical. Adds a rug representation (1-d plot) of the event times
33
#'   (only for `status=1`), Default: !add
34
#' @param s Value of the penalty parameter lambda at which predictions are
35
#'   required (for class \code{cv.glmnet} only). Only the first entry will be
36
#'   used if more than one numeric value is provided, Default: c("lambda.1se",
37
#'   "lambda.min")
38
#' @param times Vector of numeric values at which the hazard should be
39
#'   calculated. Default: NULL which means this function will use the minimum
40
#'   and maximum of observed event times with the `breaks` argument.
41
#' @param ... further arguments passed to [graphics::matplot()]
42
#' @details This is an earlier version of a function to plot hazards. We
43
#'   recommend instead using the plot method for objects returned by
44
#'   [casebase::fitSmoothHazard()]. See [casebase::plot.singleEventCB()].
45
#' @return a plot of the hazard function and a data.frame of original data used
46
#'   in the fitting along with the data used to create the plots including
47
#'   `predictedhazard` which is the predicted hazard for a given covariate
48
#'   pattern and time `predictedloghazard` is the predicted hazard on the log
49
#'   scale. `lowerbound` and `upperbound` are the lower and upper confidence
50
#'   interval bounds on the hazard scale (i.e. used to plot the confidence
51
#'   bands). `standarderror` is the standard error of the log hazard (only if
52
#'   `family="glm"` or `family="gam"`)
53
#' @seealso [casebase::fitSmoothHazard()]
54
#' @examples
55
#' data("simdat")
56
#' mod_cb <- fitSmoothHazard(status ~ trt * eventtime,
57
#'                                     time = "eventtime",
58
#'                                     data = simdat[1:200,],
59
#'                                     ratio = 1,
60
#'                                     family = "glm")
61
#'
62
#' results0 <- hazardPlot(object = mod_cb, newdata = data.frame(trt = 0),
63
#'            ci.lvl = 0.95, ci = FALSE, lty = 1, line.col = 1, lwd = 2)
64
#' head(results0)
65
#' hazardPlot(object = mod_cb, newdata = data.frame(trt = 1), ci = FALSE,
66
#'            ci.lvl = 0.95, add = TRUE, lty = 2, line.col = 2, lwd = 2)
67
#' legend("topleft", c("trt=0","trt=1"),lty=1:2,col=1:2,bty="y", lwd = 2)
68
#' @rdname hazardPlot
69
#' @export
70
#' @importFrom graphics lines matplot par polygon
71
#' @importFrom data.table between
72
#' @importFrom stats qnorm
73
hazardPlot <- function(object, newdata, type = c("hazard"), xlab = NULL,
74
                       breaks = 100, ci.lvl = 0.95, ylab = NULL, line.col = 1,
75
                       ci.col = "grey", lty = par("lty"), add = FALSE,
76
                       ci = !add, rug = !add, s = c("lambda.1se", "lambda.min"),
77
                       times = NULL, ...) {
78

79 2
    type <- match.arg(type)
80

81 2
    if (is.null(newdata))
82 0
        stop("newdata argument needs to be specified")
83

84 2
    if (!is.data.frame(newdata))
85 0
        stop("newdata must be a data.frame")
86

87 2
    if (nrow(newdata) > 1) {
88 0
        newdata <- newdata[1, , drop = FALSE]
89 0
        warning("More than 1 row supplied to 'newdata'. Only the first row will be used.")
90
    }
91

92 2
    if (any(names(newdata) %in% c("sequence_of_times", "predictedloghazard")))
93 0
        stop("sequence_of_times or predictedloghazard cannot be used as a colunm name in newdata. rename them to something else.")
94

95 2
    if (any(names(newdata) %in% object[["timeVar"]]))
96 0
        stop("%s cannot be used as a colunm name in newdata. remove it.",
97 0
             object[["timeVar"]])
98

99 2
    obj_class <- class(object)[2]
100

101 2
    if (!inherits(object, c("glm", "gam", "gbm", "cv.glmnet")))
102 0
        stop("object must be of class glm, gam, gbm or cv.glmnet")
103

104 2
    if (ci) {
105 2
        if (!data.table::between(ci.lvl, 0, 1, incbounds = FALSE))
106 0
            stop("ci.lvl must be between 0 and 1")
107 2
        if (!inherits(object, c("glm", "gam"))) {
108 0
            warning(sprintf("Confidence intervals cannot be calculated for objects of class %s.",
109 0
                            obj_class))
110 0
            ci <- FALSE
111
        }
112 2
        if (any(names(newdata) %in% c("standarderror", "lowerbound",
113 2
                                      "upperbound")))
114 0
            stop("'standarderror','lowerbound' and 'upperbound' cannot be used as column names in newdata. rename it.")
115
    }
116

117 2
    if (is.null(times)) {
118 2
        times <- object[["originalData"]][[object[["timeVar"]]]]
119 2
        times <- seq(min(times), max(times), length.out = breaks)
120
    } else {
121 2
        times <- times[order(times)]
122
    }
123

124 2
    newdata <- newdata[rep(seq_len(nrow(newdata)), each = length(times)), ,
125 2
                       drop = FALSE]
126 2
    newdata$sequence_of_times <- times
127 2
    names(newdata)[names(newdata) == "sequence_of_times"] <- object[["timeVar"]]
128

129 2
    newdata$offset <- 0
130
    # If gbm was fitted with an offset, predict.gbm ignores it
131
    # but still gives a warning. The following line silences this warning
132 2
    if (!inherits(object, "gbm")) attr(object$Terms, "offset") <- NULL
133

134 2
    preds <- switch(obj_class,
135 2
                    glm = {
136 2
                        pp <- predict(object, newdata = newdata,
137 2
                                      se.fit = ci, type = "link")
138 2
                        newdata$predictedloghazard <- if (ci) pp[["fit"]] else pp
139 2
                        pp
140
                    },
141 2
                    gam = {
142 2
                        pp <- predict(object, newdata = newdata,
143 2
                                      se.fit = ci, type = "link")
144 2
                        newdata$predictedloghazard <- if (ci) pp[["fit"]] else pp
145 2
                        pp
146
                     },
147 2
                    cv.glmnet = {
148 2
                        if (is.numeric(s)) {
149 0
                            if (length(s) > 1)  {
150 0
                                warning(paste("More than one value for s has",
151 0
                                              "been supplied. Only first entry",
152 0
                                              "will be used"))
153
                            }
154 0
                            s <- s[1]
155 2
                        } else if (is.character(s)) {
156 2
                            s <- match.arg(s)
157
                        }
158
                        # First extract RHS, then remove intercept
159 2
                        newx <- model.matrix(update(object$formula[-2],
160 2
                                                    ~ . - 1),
161 2
                                             newdata)
162
                        # the newoffset = 0 isn't required for now as glmnet
163
                        # is not using the offset because of the hack
164 2
                        pp <- predict(object, newx = newx, s = s, newoffset = 0)
165 2
                        newdata$predictedloghazard <- pp
166 2
                        pp
167
                    },
168 2
                    gbm = {
169
                        # If gbm was fitted with an offset
170
                        # predict.gbm ignores it but still gives a warning
171
                        # The following line silences this warning
172 2
                        attr(object$Terms, "offset") <- NULL
173 2
                        pp <- predict(object, newdata, n.trees = object$n.trees)
174 2
                        newdata$predictedloghazard <- pp
175 2
                        pp
176
                    }
177
    )
178

179 2
    if (ci) {
180 2
        std_err <- newdata$standarderror <- preds$se.fit
181 2
        newdata$lowerbound <- exp(newdata$predictedloghazard +
182 2
                                      qnorm(0.5 * (1 - ci.lvl)) * std_err)
183 2
        newdata$upperbound <- exp(newdata$predictedloghazard +
184 2
                                      qnorm(1 - 0.5 * (1 - ci.lvl)) * std_err)
185
    }
186

187 2
    newdata$predictedhazard <- exp(newdata$predictedloghazard)
188

189 2
    if (is.null(xlab))
190 2
        xlab <- object[["timeVar"]]
191 2
    if (is.null(ylab))
192 2
        ylab <- switch(type, hr = "Hazard ratio", hazard = "Hazard",
193 2
                       surv = "Survival", density = "Density",
194 2
                       sdiff = "Survival difference",
195 2
                       hdiff = "Hazard difference", marghaz = "Marginal hazard",
196 2
                       loghazard = "log(hazard)", link = "Linear predictor",
197 2
                       meansurv = "Mean survival", cumhaz = "Cumulative hazard",
198 2
                       meansurvdiff = "Difference in mean survival",
199 2
                       meanhr = "Mean hazard ratio", odds = "Odds",
200 2
                       or = "Odds ratio", margsurv = "Marginal survival",
201 2
                       marghr = "Marginal hazard ratio", haz = "Hazard",
202 2
                       fail = "Failure", meanhaz = "Mean hazard",
203 2
                       margfail = "Marginal failure",
204 2
                       af = "Attributable fraction",
205 2
                       meanmargsurv = "Mean marginal survival",
206 2
                       uncured = "Uncured distribution")
207

208 2
    ylims <- if (ci) {
209 2
        range(newdata$lowerbound, newdata$upperbound)
210 2
        } else range(newdata$predictedhazard)
211

212 2
    if (!add)
213 2
        matplot(newdata[[object[["timeVar"]]]], newdata[["predictedhazard"]],
214 2
                type = "n", xlab = xlab, ylab = ylab, ylim = ylims, ...)
215 2
    if (ci) {
216 2
        polygon(c(newdata[[object[["timeVar"]]]],
217 2
                  rev(newdata[[object[["timeVar"]]]])),
218 2
                c(newdata[["lowerbound"]], rev(newdata[["upperbound"]])),
219 2
                col = ci.col,
220 2
                border = ci.col)
221 2
        lines(newdata[[object[["timeVar"]]]], newdata[["predictedhazard"]],
222 2
              col = line.col, lty = lty, ...)
223 2
    } else lines(newdata[[object[["timeVar"]]]], newdata[["predictedhazard"]],
224 2
                 col = line.col, lty = lty, ...)
225 2
    if (rug) {
226 2
        events <- object[["originalData"]][[object[["eventVar"]]]]
227 2
        rug(object[["originalData"]][which(events == 1), ,
228 2
                                     drop = FALSE][[object[["timeVar"]]]],
229 2
            col = line.col, quiet = TRUE) # Silence warning about clipped values
230
    }
231 2
    return(invisible(newdata))
232
}

Read our documentation on viewing source code .

Loading