1

2
#' @title Creates a Kaplan-Meier plot for survfit object.
3
#' @description Creates a Kaplan-Meier plot with at risk tables below for survfit object.
4
#' @param sfit a survfit object
5
#' @param table logical: Create a table graphic below the K-M plot, indicating at-risk numbers?
6
#' @param xlabs x-axis label
7
#' @param ylabs y-axis label
8
#' @param xlims numeric: list of min and max for x-axis. Default = c(0,max(sfit$time))
9
#' @param ylims numeric: list of min and max for y-axis. Default = c(0,1)
10
#' @param surv.scale 	scale transformation of survival curves. Allowed values are "default" or "percent".
11
#' @param ystratalabs character list. A list of names for each strata. Default = names(sfit$strata)
12
#' @param ystrataname The legend name. Default = "Strata"
13
#' @param timeby numeric: control the granularity along the time-axis; defaults to 7 time-points. Default = signif(max(sfit$time)/7, 1)
14
#' @param main plot title
15
#' @param pval logical: add the pvalue to the plot?
16
#' @param pval.size numeric value specifying the p-value text size. Default is 5.
17
#' @param pval.coord numeric vector, of length 2, specifying the x and y coordinates of the p-value. Default values are NULL
18
#' @param pval.testname logical: add '(Log-rank)' text to p-value. Default = F
19
#' @param marks logical: should censoring marks be added?
20
#' @param shape what shape should the censoring marks be, default is a vertical line
21
#' @param legend logical. should a legend be added to the plot?
22
#' @param legendposition numeric. x, y position of the legend if plotted. Default=c(0.85,0.8)
23
#' @param ci logical. Should confidence intervals be plotted. Default = FALSE
24
#' @param subs = NULL,
25
#' @param label.nrisk Numbers at risk label. Default = "Numbers at risk"
26
#' @param size.label.nrisk Font size of label.nrisk. Default = 10
27
#' @param linecols Character. Colour brewer pallettes too colour lines. Default ="Set1", "black" for black with dashed line.
28
#' @param dashed logical. Should a variety of linetypes be used to identify lines. Default = FALSE
29
#' @param cumhaz Show cumulaive hazard function, Default: F
30
#' @param cluster.option Cluster option for p value, Option: "None", "cluster", "frailty", Default: "None"
31
#' @param cluster.var Cluster variable
32
#' @param data select specific data - for reactive input, Default = NULL
33
#' @param cut.landmark cut-off for landmark analysis, Default = NULL
34
#' @param ... PARAM_DESCRIPTION
35
#' @return Plot
36
#' @details DETAILS
37
#' @author Jinseob Kim, but heavily modified version of a script created by Michael Way.
38
#' \url{https://github.com/michaelway/ggkm/}
39
#' I have packaged this function, added functions to namespace and included a range of new parameters.
40
#' @examples
41
#'  library(survival)
42
#'  data(colon)
43
#'  fit <- survfit(Surv(time,status)~rx, data=colon)
44
#'  jskm(fit, timeby=500)
45
#' @rdname jskm
46
#' @importFrom ggplot2 ggplot
47
#' @importFrom ggplot2 aes
48
#' @importFrom ggplot2 geom_step
49
#' @importFrom ggplot2 scale_linetype_manual
50
#' @importFrom ggplot2 scale_colour_manual
51
#' @importFrom ggplot2 theme_bw
52
#' @importFrom ggplot2 theme
53
#' @importFrom ggplot2 element_text
54
#' @importFrom ggplot2 scale_x_continuous
55
#' @importFrom ggplot2 scale_y_continuous
56
#' @importFrom ggplot2 element_blank
57
#' @importFrom ggplot2 element_line
58
#' @importFrom ggplot2 element_rect
59
#' @importFrom ggplot2 labs
60
#' @importFrom ggplot2 ggtitle
61
#' @importFrom ggplot2 geom_point
62
#' @importFrom ggplot2 geom_blank
63
#' @importFrom ggplot2 annotate
64
#' @importFrom ggplot2 geom_text
65
#' @importFrom ggplot2 scale_y_discrete
66
#' @importFrom ggplot2 xlab
67
#' @importFrom ggplot2 ylab
68
#' @importFrom ggplot2 ggsave
69
#' @importFrom ggplot2 scale_colour_brewer
70
#' @importFrom ggplot2 geom_ribbon
71
#' @importFrom grid unit
72
#' @importFrom gridExtra grid.arrange
73
#' @importFrom stats pchisq time as.formula
74
#' @importFrom survival survfit survdiff coxph Surv cluster frailty
75
#' @export
76
 
77

78
jskm <- function(sfit,
79
                 table = FALSE,
80
                 xlabs = "Time-to-event",
81
                 ylabs = "Survival probability",
82
                 xlims = c(0,max(sfit$time)),
83
                 ylims = c(0,1),
84
                 surv.scale = c("default", "percent"),
85
                 ystratalabs = names(sfit$strata),
86
                 ystrataname = "Strata",
87
                 timeby = signif(max(sfit$time)/7, 1),
88
                 main = "",
89
                 pval = FALSE,
90
                 pval.size = 5, 
91
                 pval.coord = c(NULL, NULL),
92
                 pval.testname = F,
93
                 marks = TRUE,
94
                 shape = 3,
95
                 legend = TRUE,
96
                 legendposition=c(0.85,0.8),
97
                 ci = FALSE,
98
                 subs = NULL,
99
                 label.nrisk = "Numbers at risk",
100
                 size.label.nrisk = 10,
101
                 linecols="Set1",
102
                 dashed= FALSE,
103
                 cumhaz = F,
104
                 cluster.option = "None",
105
                 cluster.var = NULL,
106
                 data = NULL,
107
                 cut.landmark = NULL,
108
                 ...) {
109
  
110
  
111
  #################################
112
  # sorting the use of subsetting #
113
  #################################
114
  
115 1
  n.risk <- n.censor <- surv <- strata <- lower <- upper <- NULL
116
  
117 1
  times <- seq(0, max(sfit$time), by = timeby)
118
  
119 1
  if(is.null(subs)){
120 1
    if(length(levels(summary(sfit)$strata)) == 0) {
121 0
      subs1 <- 1
122 0
      subs2 <- 1:length(summary(sfit,censored=T)$time)
123 0
      subs3 <- 1:length(summary(sfit,times = times,extend = TRUE)$time)
124
    } else {
125 1
      subs1 <- 1:length(levels(summary(sfit)$strata))
126 1
      subs2 <- 1:length(summary(sfit,censored=T)$strata)
127 1
      subs3 <- 1:length(summary(sfit,times = times,extend = TRUE)$strata)
128
    }
129
  } else{
130 0
    for(i in 1:length(subs)){
131 0
      if(i==1){
132 0
        ssvar <- paste("(?=.*\\b=",subs[i],sep="")
133
      }
134 0
      if(i==length(subs)){
135 0
        ssvar <- paste(ssvar,"\\b)(?=.*\\b=",subs[i],"\\b)",sep="")
136
      }
137 0
      if(!i %in% c(1, length(subs))){
138 0
        ssvar <- paste(ssvar,"\\b)(?=.*\\b=",subs[i],sep="")
139
      }
140 0
      if(i==1 & i==length(subs)){
141 0
        ssvar <- paste("(?=.*\\b=",subs[i],"\\b)",sep="")
142
      }
143
    }
144 0
    subs1 <- which(regexpr(ssvar,levels(summary(sfit)$strata), perl=T)!=-1)
145 0
    subs2 <- which(regexpr(ssvar,summary(sfit,censored=T)$strata, perl=T)!=-1)
146 0
    subs3 <- which(regexpr(ssvar,summary(sfit,times = times,extend = TRUE)$strata, perl=T)!=-1)
147
  }
148
  
149 0
  if(!is.null(subs)) pval <- FALSE
150
  
151
  ##################################
152
  # data manipulation pre-plotting #
153
  ##################################
154
  
155
  
156
  
157 1
  if(length(levels(summary(sfit)$strata)) == 0) {
158
    #[subs1]
159 0
    if(is.null(ystratalabs)) ystratalabs <- as.character(sub("group=*","","All"))
160
  } else {
161
    #[subs1]
162 0
    if(is.null(ystratalabs)) ystratalabs <- as.character(sub("group=*","",names(sfit$strata)))
163
  }
164
  
165 0
  if(is.null(ystrataname)) ystrataname <- "Strata"
166 1
  m <- max(nchar(ystratalabs))
167 1
  times <- seq(0, max(sfit$time), by = timeby)
168
  
169 1
  if(length(levels(summary(sfit)$strata)) == 0) {
170 0
    Factor <- factor(rep("All",length(subs2)))
171
  } else {
172 1
    Factor <- factor(summary(sfit, censored = T)$strata[subs2])
173
  }
174
  
175
  #Data to be used in the survival plot
176
  
177 1
  df <- data.frame(
178 1
    time = sfit$time[subs2],
179 1
    n.risk = sfit$n.risk[subs2],
180 1
    n.event = sfit$n.event[subs2],
181 1
    n.censor = sfit$n.censor[subs2],
182 1
    surv = sfit$surv[subs2],
183 1
    strata = Factor,
184 1
    upper = sfit$upper[subs2],
185 1
    lower = sfit$lower[subs2]
186
  )
187
  
188 1
  form <- sfit$call$formula
189

190 1
  if (!is.null(cut.landmark)){
191 0
    if (is.null(data)){
192 0
      data <- tryCatch(eval(sfit$call$data), error = function(e) e)
193 0
      if ("error" %in% class(data)){
194 0
        stop("Landmark analysis requires data object. please input 'data' option") 
195
      }
196
    }
197
    
198 0
    var.time <- as.character(form[[2]][[2]])
199 0
    var.event <- as.character(form[[2]][[3]])
200 0
    if (length(var.event) > 1){
201 0
      var.event <- setdiff(var.event, as.character(as.symbol(var.event)))
202 0
      var.event <- var.event[sapply(var.event, function(x) {"warning" %in% class(tryCatch(as.numeric(x), warning = function(w) w))})]
203
    }
204 0
    data1 <- data
205 0
    data1[[var.event]][data1[[var.time]] >= cut.landmark] <- 0
206 0
    data1[[var.time]][data1[[var.time]] >= cut.landmark] <- cut.landmark
207
  
208 0
    sfit1 <- survfit(as.formula(form), data1)
209 0
    sfit2 <- survfit(as.formula(form), data[data[[var.time]] >= cut.landmark, ])
210
    
211 0
    if (length(levels(Factor)) == 1){
212 0
      df2 <- merge(subset(df, time >= cut.landmark)[, c("time", "n.risk", "n.event", "n.censor", "strata")], 
213 0
                   data.frame(time = sfit2$time, surv = sfit2$surv, strata = "All", upper = sfit2$upper, lower = sfit2$lower), 
214 0
                   by = c("time", "strata"))
215
    } else{
216 0
      df2 <- merge(subset(df, time >= cut.landmark)[, c("time", "n.risk", "n.event", "n.censor", "strata")], 
217 0
                   data.frame(time = sfit2$time, surv = sfit2$surv, strata = rep(names(sfit2$strata), sfit2$strata), upper = sfit2$upper, lower = sfit2$lower), 
218 0
                   by = c("time", "strata"))
219
    }
220
    
221
    
222
    
223 0
    df11 <- rbind(subset(df, time < cut.landmark), df2) 
224 0
    df <- rbind(df11, data.frame(time = cut.landmark, n.risk = summary(sfit, times = cut.landmark)$n.risk,  n.event = 0, n.censor = 0, surv = 1, strata = factor(ystratalabs, levels = levels(df$strata)), upper = 1, lower = 1))
225
  }
226
  
227
  
228 1
  if (cumhaz){
229 1
    upper.new <- 1 - df$lower
230 1
    lower.new <- 1 - df$upper
231 1
    df$surv = 1 - df$surv
232 1
    df$lower = lower.new
233 1
    df$upper = upper.new
234
    
235
  }
236
  
237
  #Final changes to data for survival plot
238 1
  levels(df$strata) <- ystratalabs
239 1
  zeros <- data.frame(time = 0, n.risk = NA, n.event = NA, n.censor = NA, surv = 1,
240 1
                      strata = factor(ystratalabs, levels=levels(df$strata)),
241 1
                      upper = 1, lower = 1)
242 1
  if (cumhaz){
243 1
    zeros$surv = 0
244 1
    zeros$lower = 0
245 1
    zeros$upper = 0
246
  }
247
  
248 1
  df <- rbind(zeros, df)
249 1
  d <- length(levels(df$strata))
250
  
251
  ###################################
252
  # specifying axis parameteres etc #
253
  ###################################
254
  
255 1
  if(dashed == TRUE | linecols == "black"){
256 0
    linetype=c("solid", "dashed", "dotted", "dotdash", "longdash", "twodash", "1F", "F1", "4C88C488", "12345678")
257
  } else {
258 1
    linetype=c("solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid")
259
  }
260
  
261
  # Scale transformation
262
  #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
263 1
  surv.scale <- match.arg(surv.scale)
264 1
  scale_labels <-  ggplot2::waiver()
265 1
  if (surv.scale == "percent") scale_labels <- scales::percent
266
  
267 1
  p <- ggplot2::ggplot( df, aes(x=time, y=surv, colour=strata, linetype=strata)) + ggtitle(main)
268
  
269 1
  linecols2 <- linecols
270 1
  if (linecols == "black"){
271 0
    linecols <- "Set1"
272 0
    p <- ggplot2::ggplot( df, aes(x=time, y=surv, linetype=strata)) + ggtitle(main)
273
  }
274
  
275
  
276
  #Set up theme elements
277 1
  p <- p + theme_bw() +
278 1
    theme(axis.title.x = element_text(vjust = 0.7),
279 1
          panel.grid.minor = element_blank(),
280 1
          axis.line = element_line(size =0.5, colour = "black"),
281 1
          legend.position = legendposition,
282 1
          legend.background = element_rect(fill = NULL),
283 1
          legend.key = element_rect(colour = NA),
284 1
          panel.border = element_blank(),
285 1
          plot.margin = unit(c(0, 1, .5,ifelse(m < 10, 1.5, 2.5)),"lines"),
286 1
          panel.grid.major = element_blank(),
287 1
          axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
288 1
          axis.line.y = element_line(size = 0.5, linetype = "solid", colour = "black")) +
289 1
    scale_x_continuous(xlabs, breaks = times, limits = xlims) +
290 1
    scale_y_continuous(ylabs, limits = ylims, labels = scale_labels)
291
  
292
  
293
  
294
  #Removes the legend:
295 1
  if(legend == FALSE)
296 1
    p <- p + theme(legend.position="none")
297
  
298
  #Add lines too plot
299 1
  p <- p + geom_step(size = 0.75) +
300 1
    scale_linetype_manual(name = ystrataname, values=linetype) +
301 1
    scale_colour_brewer(name = ystrataname, palette=linecols)
302
  
303
  #Add censoring marks to the line:
304 1
  if(marks == TRUE)
305 1
    p <- p + geom_point(data = subset(df, n.censor >= 1), aes(x = time, y = surv), shape = shape, colour = "black")
306
  
307
  #Add 95% CI to plot
308 1
  if(ci == TRUE){
309 1
    if (linecols2 == "black"){
310 0
      p <- p +  geom_ribbon(data=df, aes(ymin = lower, ymax = upper), alpha=0.25, colour=NA) 
311
    } else{
312 1
      p <- p +  geom_ribbon(data=df, aes(ymin = lower, ymax = upper, fill = strata), alpha=0.25, colour=NA) + scale_fill_brewer(name = ystrataname, palette=linecols)
313
    } 
314
  }
315
  
316 1
  if (!is.null(cut.landmark)){
317 0
    p <- p + geom_vline(xintercept = cut.landmark, lty = 2)
318
  }
319
  
320
  ## Create a blank plot for place-holding
321 1
  blank.pic <- ggplot(df, aes(time, surv)) +
322 1
    geom_blank() + theme_void() +             ## Remove gray color
323 1
    theme(axis.text.x = element_blank(),axis.text.y = element_blank(),
324 1
          axis.title.x = element_blank(),axis.title.y = element_blank(),
325 1
          axis.ticks = element_blank(),
326 1
          panel.grid.major = element_blank(),panel.border = element_blank())
327
  
328
  #####################
329
  # p-value placement #
330
  #####################a
331
  
332 0
  if(length(levels(summary(sfit)$strata)) == 0) pval <- F
333
  #if(!is.null(cut.landmark)) pval <- F
334
  
335 1
  if(pval == TRUE) {
336 1
    if (is.null(data)){
337 1
      data <- tryCatch(eval(sfit$call$data), error = function(e) e)
338 1
      if ("error" %in% class(data)){
339 0
        stop("'pval' option requires data object. please input 'data' option") 
340
      }
341
    }
342
    
343 1
    if (is.null(cut.landmark)){
344 1
      sdiff <- survival::survdiff(as.formula(form), data = data)
345 1
      pvalue <- pchisq(sdiff$chisq,length(sdiff$n) - 1,lower.tail = FALSE)
346
      
347
      ## cluster option
348 1
      if (cluster.option == "cluster" & !is.null(cluster.var)){
349 1
        form.old <- as.character(form)
350 1
        form.new <- paste(form.old[2], form.old[1], " + ", form.old[3], " + cluster(", cluster.var, ")", sep="")
351 1
        sdiff <- survival::coxph(as.formula(form.new), data = data, model = T, robust = T)
352 1
        pvalue <- summary(sdiff)$robscore["pvalue"]
353 1
      } else if (cluster.option == "frailty" & !is.null(cluster.var)){
354 1
        form.old <- as.character(form)
355 1
        form.new <- paste(form.old[2], form.old[1], " + ", form.old[3], " + frailty(", cluster.var, ")", sep="")
356 1
        sdiff <- survival::coxph(as.formula(form.new), data =data, model = T)
357 1
        pvalue <- summary(sdiff)$logtest["pvalue"]
358
      }
359
      
360 1
      pvaltxt <- ifelse(pvalue < 0.001, "p < 0.001", paste("p =", round(pvalue, 3)))
361 0
      if (pval.testname) pvaltxt <- paste0(pvaltxt, " (Log-rank)")
362
      
363
      # MOVE P-VALUE LEGEND HERE BELOW [set x and y]
364 1
      if (is.null(pval.coord)){
365 1
        p <- p + annotate("text",x = (as.integer(max(sfit$time)/5)), y = 0.1 + ylims[1],label = pvaltxt, size  = pval.size)
366
      } else{
367 0
        p <- p + annotate("text",x = pval.coord[1], y = pval.coord[2], label = pvaltxt, size  = pval.size)
368
      }
369
    } else {
370 0
      sdiff1 <- survival::survdiff(as.formula(form), data1)
371 0
      sdiff2 <- survival::survdiff(as.formula(form), data[data[[var.time]] >= cut.landmark, ])
372 0
      pvalue <- sapply(list(sdiff1, sdiff2), function(x){pchisq(x$chisq,length(x$n) - 1,lower.tail = FALSE)})
373
      
374
      ## cluster option
375 0
      if (cluster.option == "cluster" & !is.null(cluster.var)){
376 0
        form.old <- as.character(form)
377 0
        form.new <- paste(form.old[2], form.old[1], " + ", form.old[3], " + cluster(", cluster.var, ")", sep="")
378 0
        sdiff1 <- survival::coxph(as.formula(form.new), data = data1, model = T, robust = T)
379 0
        sdiff2 <- survival::coxph(as.formula(form.new), data = data[data[[var.time]] >= cut.landmark, ], model = T, robust = T)
380 0
        pvalue <- sapply(list(sdiff1, sdiff2), function(x){summary(x)$robscore["pvalue"]})
381 0
      } else if (cluster.option == "frailty" & !is.null(cluster.var)){
382 0
        form.old <- as.character(form)
383 0
        form.new <- paste(form.old[2], form.old[1], " + ", form.old[3], " + frailty(", cluster.var, ")", sep="")
384 0
        sdiff1 <- survival::coxph(as.formula(form.new), data = data1, model = T)
385 0
        sdiff2 <- survival::coxph(as.formula(form.new), data = data[data[[var.time]] >= cut.landmark, ], model = T)
386 0
        pvalue <- sapply(list(sdiff1, sdiff2), function(x){summary(x)$logtest["pvalue"]})
387
      }
388
      
389 0
      pvaltxt <- ifelse(pvalue < 0.001, "p < 0.001", paste("p =", round(pvalue, 3)))
390
      
391 0
      if (pval.testname) pvaltxt <- paste0(pvaltxt, " (Log-rank)")
392
      
393 0
      if (is.null(pval.coord)){
394 0
        p <- p + annotate("text",x = c(as.integer(max(sfit$time)/10), as.integer(max(sfit$time)/10) + cut.landmark), y = 0.1 + ylims[1],label = pvaltxt, size  = pval.size)
395
      } else{
396 0
        p <- p + annotate("text",x = c(pval.coord[1], pval.coord[1] + cut.landmark), y = pval.coord[2], label = pvaltxt, size  = pval.size)
397
      }
398
    }
399
  
400
  }
401
  
402
  ###################################################
403
  # Create table graphic to include at-risk numbers #
404
  ###################################################
405
  
406 1
  n.risk <- NULL
407 1
  if(length(levels(summary(sfit)$strata)) == 0) {
408 0
    Factor <- factor(rep("All",length(subs3)))
409
  } else {
410 1
    Factor <- factor(summary(sfit,times = times,extend = TRUE)$strata[subs3])
411
  }
412
  
413 1
  if(table == TRUE) {
414 1
    risk.data <- data.frame(
415 1
      strata = Factor,
416 1
      time = summary(sfit,times = times,extend = TRUE)$time[subs3],
417 1
      n.risk = summary(sfit,times = times,extend = TRUE)$n.risk[subs3]
418
    )
419
    
420 1
    risk.data$strata <- factor(risk.data$strata, levels=rev(levels(risk.data$strata)))
421
    
422 1
    data.table <- ggplot(risk.data,aes(x = time, y = strata, label = format(n.risk, nsmall = 0))) + 
423 1
      geom_text(size = 3.5) + theme_bw() +
424 1
      scale_y_discrete(breaks = as.character(levels(risk.data$strata)),
425 1
                       labels = rev(ystratalabs)) +
426 1
      scale_x_continuous(label.nrisk, limits = xlims) +
427 1
      theme(axis.title.x = element_text(size = size.label.nrisk, vjust = 1),
428 1
            panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
429 1
            panel.border = element_blank(),axis.text.x = element_blank(),
430 1
            axis.ticks = element_blank(),axis.text.y = element_text(face = "bold",hjust = 1)) 
431 1
    data.table <- data.table +
432 1
      theme(legend.position = "none") + xlab(NULL) + ylab(NULL)
433
    
434
    
435
    # ADJUST POSITION OF TABLE FOR AT RISK
436 1
    data.table <- data.table +
437 1
      theme(plot.margin = unit(c(-1.5, 1, 0.1, ifelse(m < 10, 2.5, 3.5) - 0.15 * m), "lines"))
438
  }
439
  
440
  
441
  #######################
442
  # Plotting the graphs #
443
  #######################
444
  
445 1
  if(table == TRUE){
446 1
    grid.arrange(p, blank.pic, data.table, clip = FALSE, nrow = 3,
447 1
                 ncol = 1, heights = unit(c(2, .1, .25),c("null", "null", "null")))
448
  } else {
449 1
    p
450
  }
451
  
452
}

Read our documentation on viewing source code .

Loading