1

2
#' @title Creates a Kaplan-Meier plot for survfit object.
3
#' @description Creates a Kaplan-Meier plot with at risk tables below for survfit object.
4
#' @param sfit a survfit object
5
#' @param table logical: Create a table graphic below the K-M plot, indicating at-risk numbers?
6
#' @param xlabs x-axis label
7
#' @param ylabs y-axis label
8
#' @param xlims numeric: list of min and max for x-axis. Default = c(0,max(sfit$time))
9
#' @param ylims numeric: list of min and max for y-axis. Default = c(0,1)
10
#' @param surv.scale 	scale transformation of survival curves. Allowed values are "default" or "percent".
11
#' @param ystratalabs character list. A list of names for each strata. Default = names(sfit$strata)
12
#' @param ystrataname The legend name. Default = "Strata"
13
#' @param timeby numeric: control the granularity along the time-axis; defaults to 7 time-points. Default = signif(max(sfit$time)/7, 1)
14
#' @param main plot title
15
#' @param pval logical: add the pvalue to the plot?
16
#' @param pval.size numeric value specifying the p-value text size. Default is 5.
17
#' @param pval.coord numeric vector, of length 2, specifying the x and y coordinates of the p-value. Default values are NULL
18
#' @param pval.testname logical: add '(Log-rank)' text to p-value. Default = F
19
#' @param marks logical: should censoring marks be added?
20
#' @param shape what shape should the censoring marks be, default is a vertical line
21
#' @param legend logical. should a legend be added to the plot?
22
#' @param legendposition numeric. x, y position of the legend if plotted. Default=c(0.85,0.8)
23
#' @param ci logical. Should confidence intervals be plotted. Default = FALSE
24
#' @param subs = NULL,
25
#' @param label.nrisk Numbers at risk label. Default = "Numbers at risk"
26
#' @param size.label.nrisk Font size of label.nrisk. Default = 10
27
#' @param linecols Character. Colour brewer pallettes too colour lines. Default ="Set1", "black" for black with dashed line.
28
#' @param dashed logical. Should a variety of linetypes be used to identify lines. Default = FALSE
29
#' @param cumhaz Show cumulaive hazard function, Default: F
30
#' @param cluster.option Cluster option for p value, Option: "None", "cluster", "frailty", Default: "None"
31
#' @param cluster.var Cluster variable
32
#' @param data select specific data - for reactive input, Default = NULL
33
#' @param cut.landmark cut-off for landmark analysis, Default = NULL
34
#' @param ... PARAM_DESCRIPTION
35
#' @return Plot
36
#' @details DETAILS
37
#' @author Jinseob Kim, but heavily modified version of a script created by Michael Way.
38
#' \url{https://github.com/michaelway/ggkm/}
39
#' I have packaged this function, added functions to namespace and included a range of new parameters.
40
#' @examples
41
#'  library(survival)
42
#'  data(colon)
43
#'  fit <- survfit(Surv(time,status)~rx, data=colon)
44
#'  jskm(fit, timeby=500)
45
#' @rdname jskm
46
#' @importFrom ggplot2 ggplot
47
#' @importFrom ggplot2 aes
48
#' @importFrom ggplot2 geom_step
49
#' @importFrom ggplot2 scale_linetype_manual
50
#' @importFrom ggplot2 scale_colour_manual
51
#' @importFrom ggplot2 theme_bw
52
#' @importFrom ggplot2 theme
53
#' @importFrom ggplot2 element_text
54
#' @importFrom ggplot2 scale_x_continuous
55
#' @importFrom ggplot2 scale_y_continuous
56
#' @importFrom ggplot2 element_blank
57
#' @importFrom ggplot2 element_line
58
#' @importFrom ggplot2 element_rect
59
#' @importFrom ggplot2 labs
60
#' @importFrom ggplot2 ggtitle
61
#' @importFrom ggplot2 geom_point
62
#' @importFrom ggplot2 geom_blank
63
#' @importFrom ggplot2 annotate
64
#' @importFrom ggplot2 geom_text
65
#' @importFrom ggplot2 scale_y_discrete
66
#' @importFrom ggplot2 xlab
67
#' @importFrom ggplot2 ylab
68
#' @importFrom ggplot2 ggsave
69
#' @importFrom ggplot2 scale_colour_brewer
70
#' @importFrom ggplot2 geom_ribbon
71
#' @importFrom grid unit
72
#' @importFrom gridExtra grid.arrange
73
#' @importFrom plyr rbind.fill
74
#' @importFrom stats pchisq time as.formula
75
#' @importFrom survival survfit survdiff coxph Surv cluster frailty
76
#' @export
77
 
78

79
jskm <- function(sfit,
80
                 table = FALSE,
81
                 xlabs = "Time-to-event",
82
                 ylabs = "Survival probability",
83
                 xlims = c(0,max(sfit$time)),
84
                 ylims = c(0,1),
85
                 surv.scale = c("default", "percent"),
86
                 ystratalabs = names(sfit$strata),
87
                 ystrataname = "Strata",
88
                 timeby = signif(max(sfit$time)/7, 1),
89
                 main = "",
90
                 pval = FALSE,
91
                 pval.size = 5, 
92
                 pval.coord = c(NULL, NULL),
93
                 pval.testname = F,
94
                 marks = TRUE,
95
                 shape = 3,
96
                 legend = TRUE,
97
                 legendposition=c(0.85,0.8),
98
                 ci = FALSE,
99
                 subs = NULL,
100
                 label.nrisk = "Numbers at risk",
101
                 size.label.nrisk = 10,
102
                 linecols="Set1",
103
                 dashed= FALSE,
104
                 cumhaz = F,
105
                 cluster.option = "None",
106
                 cluster.var = NULL,
107
                 data = NULL,
108
                 cut.landmark = NULL,
109
                 ...) {
110
  
111
  
112
  #################################
113
  # sorting the use of subsetting #
114
  #################################
115
  
116 1
  n.risk <- n.censor <- surv <- strata <- lower <- upper <- NULL
117
  
118 1
  times <- seq(0, max(sfit$time), by = timeby)
119
  
120 1
  if(is.null(subs)){
121 1
    if(length(levels(summary(sfit)$strata)) == 0) {
122 0
      subs1 <- 1
123 0
      subs2 <- 1:length(summary(sfit,censored=T)$time)
124 0
      subs3 <- 1:length(summary(sfit,times = times,extend = TRUE)$time)
125
    } else {
126 1
      subs1 <- 1:length(levels(summary(sfit)$strata))
127 1
      subs2 <- 1:length(summary(sfit,censored=T)$strata)
128 1
      subs3 <- 1:length(summary(sfit,times = times,extend = TRUE)$strata)
129
    }
130
  } else{
131 0
    for(i in 1:length(subs)){
132 0
      if(i==1){
133 0
        ssvar <- paste("(?=.*\\b=",subs[i],sep="")
134
      }
135 0
      if(i==length(subs)){
136 0
        ssvar <- paste(ssvar,"\\b)(?=.*\\b=",subs[i],"\\b)",sep="")
137
      }
138 0
      if(!i %in% c(1, length(subs))){
139 0
        ssvar <- paste(ssvar,"\\b)(?=.*\\b=",subs[i],sep="")
140
      }
141 0
      if(i==1 & i==length(subs)){
142 0
        ssvar <- paste("(?=.*\\b=",subs[i],"\\b)",sep="")
143
      }
144
    }
145 0
    subs1 <- which(regexpr(ssvar,levels(summary(sfit)$strata), perl=T)!=-1)
146 0
    subs2 <- which(regexpr(ssvar,summary(sfit,censored=T)$strata, perl=T)!=-1)
147 0
    subs3 <- which(regexpr(ssvar,summary(sfit,times = times,extend = TRUE)$strata, perl=T)!=-1)
148
  }
149
  
150 0
  if(!is.null(subs)) pval <- FALSE
151
  
152
  ##################################
153
  # data manipulation pre-plotting #
154
  ##################################
155
  
156
  
157
  
158 1
  if(length(levels(summary(sfit)$strata)) == 0) {
159
    #[subs1]
160 0
    if(is.null(ystratalabs)) ystratalabs <- as.character(sub("group=*","","All"))
161
  } else {
162
    #[subs1]
163 0
    if(is.null(ystratalabs)) ystratalabs <- as.character(sub("group=*","",names(sfit$strata)))
164
  }
165
  
166 0
  if(is.null(ystrataname)) ystrataname <- "Strata"
167 1
  m <- max(nchar(ystratalabs))
168 1
  times <- seq(0, max(sfit$time), by = timeby)
169
  
170 1
  if(length(levels(summary(sfit)$strata)) == 0) {
171 0
    Factor <- factor(rep("All",length(subs2)))
172
  } else {
173 1
    Factor <- factor(summary(sfit, censored = T)$strata[subs2])
174
  }
175
  
176
  #Data to be used in the survival plot
177
  
178 1
  df <- data.frame(
179 1
    time = sfit$time[subs2],
180 1
    n.risk = sfit$n.risk[subs2],
181 1
    n.event = sfit$n.event[subs2],
182 1
    n.censor = sfit$n.censor[subs2],
183 1
    surv = sfit$surv[subs2],
184 1
    strata = Factor,
185 1
    upper = sfit$upper[subs2],
186 1
    lower = sfit$lower[subs2]
187
  )
188
  
189 1
  form <- sfit$call$formula
190

191 1
  if (!is.null(cut.landmark)){
192 0
    if (is.null(data)){
193 0
      data <- tryCatch(eval(sfit$call$data), error = function(e) e)
194 0
      if ("error" %in% class(data)){
195 0
        stop("Landmark analysis requires data object. please input 'data' option") 
196
      }
197
    }
198
    
199 0
    var.time <- as.character(form[[2]][[2]])
200 0
    var.event <- as.character(form[[2]][[3]])
201 0
    if (length(var.event) > 1){
202 0
      var.event <- setdiff(var.event, as.character(as.symbol(var.event)))
203 0
      var.event <- var.event[sapply(var.event, function(x) {"warning" %in% class(tryCatch(as.numeric(x), warning = function(w) w))})]
204
    }
205 0
    data1 <- data
206 0
    data1[[var.event]][data1[[var.time]] >= cut.landmark] <- 0
207 0
    data1[[var.time]][data1[[var.time]] >= cut.landmark] <- cut.landmark
208
  
209 0
    sfit1 <- survfit(as.formula(form), data1)
210 0
    sfit2 <- survfit(as.formula(form), data[data[[var.time]] >= cut.landmark, ])
211
    
212 0
    if (length(levels(Factor)) == 1){
213 0
      df2 <- merge(subset(df, time >= cut.landmark)[, c("time", "n.risk", "n.event", "n.censor", "strata")], 
214 0
                   data.frame(time = sfit2$time, surv = sfit2$surv, strata = "All", upper = sfit2$upper, lower = sfit2$lower), 
215 0
                   by = c("time", "strata"))
216
    } else{
217 0
      df2 <- merge(subset(df, time >= cut.landmark)[, c("time", "n.risk", "n.event", "n.censor", "strata")], 
218 0
                   data.frame(time = sfit2$time, surv = sfit2$surv, strata = rep(names(sfit2$strata), sfit2$strata), upper = sfit2$upper, lower = sfit2$lower), 
219 0
                   by = c("time", "strata"))
220
    }
221
    
222
    
223
    
224 0
    df11 <- rbind(subset(df, time < cut.landmark), df2) 
225 0
    df <- rbind(df11, data.frame(time = cut.landmark, n.risk = summary(sfit, times = cut.landmark)$n.risk,  n.event = 0, n.censor = 0, surv = 1, strata = factor(ystratalabs, levels = levels(df$strata)), upper = 1, lower = 1))
226
  }
227
  
228
  
229 1
  if (cumhaz){
230 1
    upper.new <- 1 - df$lower
231 1
    lower.new <- 1 - df$upper
232 1
    df$surv = 1 - df$surv
233 1
    df$lower = lower.new
234 1
    df$upper = upper.new
235
    
236
  }
237
  
238
  #Final changes to data for survival plot
239 1
  levels(df$strata) <- ystratalabs
240 1
  zeros <- data.frame(time = 0, n.risk = NA, n.event = NA, n.censor = NA, surv = 1,
241 1
                      strata = factor(ystratalabs, levels=levels(df$strata)),
242 1
                      upper = 1, lower = 1)
243 1
  if (cumhaz){
244 1
    zeros$surv = 0
245 1
    zeros$lower = 0
246 1
    zeros$upper = 0
247
  }
248
  
249 1
  df <- rbind(zeros, df)
250 1
  d <- length(levels(df$strata))
251
  
252
  ###################################
253
  # specifying axis parameteres etc #
254
  ###################################
255
  
256 1
  if(dashed == TRUE | linecols == "black"){
257 0
    linetype=c("solid", "dashed", "dotted", "dotdash", "longdash", "twodash", "1F", "F1", "4C88C488", "12345678")
258
  } else {
259 1
    linetype=c("solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid")
260
  }
261
  
262
  # Scale transformation
263
  #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
264 1
  surv.scale <- match.arg(surv.scale)
265 1
  scale_labels <-  ggplot2::waiver()
266 1
  if (surv.scale == "percent") scale_labels <- scales::percent
267
  
268 1
  p <- ggplot2::ggplot( df, aes(x=time, y=surv, colour=strata, linetype=strata)) + ggtitle(main)
269
  
270 1
  linecols2 <- linecols
271 1
  if (linecols == "black"){
272 0
    linecols <- "Set1"
273 0
    p <- ggplot2::ggplot( df, aes(x=time, y=surv, linetype=strata)) + ggtitle(main)
274
  }
275
  
276
  
277
  #Set up theme elements
278 1
  p <- p + theme_bw() +
279 1
    theme(axis.title.x = element_text(vjust = 0.7),
280 1
          panel.grid.minor = element_blank(),
281 1
          axis.line = element_line(size =0.5, colour = "black"),
282 1
          legend.position = legendposition,
283 1
          legend.background = element_rect(fill = NULL),
284 1
          legend.key = element_rect(colour = NA),
285 1
          panel.border = element_blank(),
286 1
          plot.margin = unit(c(0, 1, .5,ifelse(m < 10, 1.5, 2.5)),"lines"),
287 1
          panel.grid.major = element_blank(),
288 1
          axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
289 1
          axis.line.y = element_line(size = 0.5, linetype = "solid", colour = "black")) +
290 1
    scale_x_continuous(xlabs, breaks = times, limits = xlims) +
291 1
    scale_y_continuous(ylabs, limits = ylims, labels = scale_labels)
292
  
293
  
294
  
295
  #Removes the legend:
296 1
  if(legend == FALSE)
297 1
    p <- p + theme(legend.position="none")
298
  
299
  #Add lines too plot
300 1
  p <- p + geom_step(size = 0.75) +
301 1
    scale_linetype_manual(name = ystrataname, values=linetype) +
302 1
    scale_colour_brewer(name = ystrataname, palette=linecols)
303
  
304
  #Add censoring marks to the line:
305 1
  if(marks == TRUE)
306 1
    p <- p + geom_point(data = subset(df, n.censor >= 1), aes(x = time, y = surv), shape = shape, colour = "black")
307
  
308
  #Add 95% CI to plot
309 1
  if(ci == TRUE){
310 1
    if (linecols2 == "black"){
311 0
      p <- p +  geom_ribbon(data=df, aes(ymin = lower, ymax = upper), alpha=0.25, colour=NA) 
312
    } else{
313 1
      p <- p +  geom_ribbon(data=df, aes(ymin = lower, ymax = upper, fill = strata), alpha=0.25, colour=NA) + scale_fill_brewer(name = ystrataname, palette=linecols)
314
    } 
315
  }
316
  
317 1
  if (!is.null(cut.landmark)){
318 0
    p <- p + geom_vline(xintercept = cut.landmark, lty = 2)
319
  }
320
  
321
  ## Create a blank plot for place-holding
322 1
  blank.pic <- ggplot(df, aes(time, surv)) +
323 1
    geom_blank() + theme_void() +             ## Remove gray color
324 1
    theme(axis.text.x = element_blank(),axis.text.y = element_blank(),
325 1
          axis.title.x = element_blank(),axis.title.y = element_blank(),
326 1
          axis.ticks = element_blank(),
327 1
          panel.grid.major = element_blank(),panel.border = element_blank())
328
  
329
  #####################
330
  # p-value placement #
331
  #####################a
332
  
333 0
  if(length(levels(summary(sfit)$strata)) == 0) pval <- F
334
  #if(!is.null(cut.landmark)) pval <- F
335
  
336 1
  if(pval == TRUE) {
337 1
    if (is.null(data)){
338 1
      data <- tryCatch(eval(sfit$call$data), error = function(e) e)
339 1
      if ("error" %in% class(data)){
340 0
        stop("'pval' option requires data object. please input 'data' option") 
341
      }
342
    }
343
    
344 1
    if (is.null(cut.landmark)){
345 1
      sdiff <- survival::survdiff(as.formula(form), data = data)
346 1
      pvalue <- pchisq(sdiff$chisq,length(sdiff$n) - 1,lower.tail = FALSE)
347
      
348
      ## cluster option
349 1
      if (cluster.option == "cluster" & !is.null(cluster.var)){
350 1
        form.old <- as.character(form)
351 1
        form.new <- paste(form.old[2], form.old[1], " + ", form.old[3], " + cluster(", cluster.var, ")", sep="")
352 1
        sdiff <- survival::coxph(as.formula(form.new), data = data, model = T, robust = T)
353 1
        pvalue <- summary(sdiff)$robscore["pvalue"]
354 1
      } else if (cluster.option == "frailty" & !is.null(cluster.var)){
355 1
        form.old <- as.character(form)
356 1
        form.new <- paste(form.old[2], form.old[1], " + ", form.old[3], " + frailty(", cluster.var, ")", sep="")
357 1
        sdiff <- survival::coxph(as.formula(form.new), data =data, model = T)
358 1
        pvalue <- summary(sdiff)$logtest["pvalue"]
359
      }
360
      
361 1
      pvaltxt <- ifelse(pvalue < 0.001, "p < 0.001", paste("p =", round(pvalue, 3)))
362 0
      if (pval.testname) pvaltxt <- paste0(pvaltxt, " (Log-rank)")
363
      
364
      # MOVE P-VALUE LEGEND HERE BELOW [set x and y]
365 1
      if (is.null(pval.coord)){
366 1
        p <- p + annotate("text",x = (as.integer(max(sfit$time)/5)), y = 0.1 + ylims[1],label = pvaltxt, size  = pval.size)
367
      } else{
368 0
        p <- p + annotate("text",x = pval.coord[1], y = pval.coord[2], label = pvaltxt, size  = pval.size)
369
      }
370
    } else {
371 0
      sdiff1 <- survival::survdiff(as.formula(form), data1)
372 0
      sdiff2 <- survival::survdiff(as.formula(form), data[data[[var.time]] >= cut.landmark, ])
373 0
      pvalue <- sapply(list(sdiff1, sdiff2), function(x){pchisq(x$chisq,length(x$n) - 1,lower.tail = FALSE)})
374
      
375
      ## cluster option
376 0
      if (cluster.option == "cluster" & !is.null(cluster.var)){
377 0
        form.old <- as.character(form)
378 0
        form.new <- paste(form.old[2], form.old[1], " + ", form.old[3], " + cluster(", cluster.var, ")", sep="")
379 0
        sdiff1 <- survival::coxph(as.formula(form.new), data = data1, model = T, robust = T)
380 0
        sdiff2 <- survival::coxph(as.formula(form.new), data = data[data[[var.time]] >= cut.landmark, ], model = T, robust = T)
381 0
        pvalue <- sapply(list(sdiff1, sdiff2), function(x){summary(x)$robscore["pvalue"]})
382 0
      } else if (cluster.option == "frailty" & !is.null(cluster.var)){
383 0
        form.old <- as.character(form)
384 0
        form.new <- paste(form.old[2], form.old[1], " + ", form.old[3], " + frailty(", cluster.var, ")", sep="")
385 0
        sdiff1 <- survival::coxph(as.formula(form.new), data = data1, model = T)
386 0
        sdiff2 <- survival::coxph(as.formula(form.new), data = data[data[[var.time]] >= cut.landmark, ], model = T)
387 0
        pvalue <- sapply(list(sdiff1, sdiff2), function(x){summary(x)$logtest["pvalue"]})
388
      }
389
      
390 0
      pvaltxt <- ifelse(pvalue < 0.001, "p < 0.001", paste("p =", round(pvalue, 3)))
391
      
392 0
      if (pval.testname) pvaltxt <- paste0(pvaltxt, " (Log-rank)")
393
      
394 0
      if (is.null(pval.coord)){
395 0
        p <- p + annotate("text",x = c(as.integer(max(sfit$time)/10), as.integer(max(sfit$time)/10) + cut.landmark), y = 0.1 + ylims[1],label = pvaltxt, size  = pval.size)
396
      } else{
397 0
        p <- p + annotate("text",x = c(pval.coord[1], pval.coord[1] + cut.landmark), y = pval.coord[2], label = pvaltxt, size  = pval.size)
398
      }
399
    }
400
  
401
  }
402
  
403
  ###################################################
404
  # Create table graphic to include at-risk numbers #
405
  ###################################################
406
  
407 1
  n.risk <- NULL
408 1
  if(length(levels(summary(sfit)$strata)) == 0) {
409 0
    Factor <- factor(rep("All",length(subs3)))
410
  } else {
411 1
    Factor <- factor(summary(sfit,times = times,extend = TRUE)$strata[subs3])
412
  }
413
  
414 1
  if(table == TRUE) {
415 1
    risk.data <- data.frame(
416 1
      strata = Factor,
417 1
      time = summary(sfit,times = times,extend = TRUE)$time[subs3],
418 1
      n.risk = summary(sfit,times = times,extend = TRUE)$n.risk[subs3]
419
    )
420
    
421 1
    risk.data$strata <- factor(risk.data$strata, levels=rev(levels(risk.data$strata)))
422
    
423 1
    data.table <- ggplot(risk.data,aes(x = time, y = strata, label = format(n.risk, nsmall = 0))) + 
424 1
      geom_text(size = 3.5) + theme_bw() +
425 1
      scale_y_discrete(breaks = as.character(levels(risk.data$strata)),
426 1
                       labels = rev(ystratalabs)) +
427 1
      scale_x_continuous(label.nrisk, limits = xlims) +
428 1
      theme(axis.title.x = element_text(size = size.label.nrisk, vjust = 1),
429 1
            panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
430 1
            panel.border = element_blank(),axis.text.x = element_blank(),
431 1
            axis.ticks = element_blank(),axis.text.y = element_text(face = "bold",hjust = 1)) 
432 1
    data.table <- data.table +
433 1
      theme(legend.position = "none") + xlab(NULL) + ylab(NULL)
434
    
435
    
436
    # ADJUST POSITION OF TABLE FOR AT RISK
437 1
    data.table <- data.table +
438 1
      theme(plot.margin = unit(c(-1.5, 1, 0.1, ifelse(m < 10, 2.5, 3.5) - 0.15 * m), "lines"))
439
  }
440
  
441
  
442
  #######################
443
  # Plotting the graphs #
444
  #######################
445
  
446 1
  if(table == TRUE){
447 1
    grid.arrange(p, blank.pic, data.table, clip = FALSE, nrow = 3,
448 1
                 ncol = 1, heights = unit(c(2, .1, .25),c("null", "null", "null")))
449
  } else {
450 1
    p
451
  }
452
  
453
}

Read our documentation on viewing source code .

Loading