1

2
#' @title Creates a Kaplan-Meier plot for survfit object.
3
#' @description Creates a Kaplan-Meier plot with at risk tables below for survfit object.
4
#' @param sfit a survfit object
5
#' @param table logical: Create a table graphic below the K-M plot, indicating at-risk numbers?
6
#' @param xlabs x-axis label
7
#' @param ylabs y-axis label
8
#' @param xlims numeric: list of min and max for x-axis. Default = c(0,max(sfit$time))
9
#' @param ylims numeric: list of min and max for y-axis. Default = c(0,1)
10
#' @param surv.scale 	scale transformation of survival curves. Allowed values are "default" or "percent".
11
#' @param ystratalabs character list. A list of names for each strata. Default = names(sfit$strata)
12
#' @param ystrataname The legend name. Default = "Strata"
13
#' @param timeby numeric: control the granularity along the time-axis; defaults to 7 time-points. Default = signif(max(sfit$time)/7, 1)
14
#' @param main plot title
15
#' @param pval logical: add the pvalue to the plot?
16
#' @param pval.size numeric value specifying the p-value text size. Default is 5.
17
#' @param pval.coord numeric vector, of length 2, specifying the x and y coordinates of the p-value. Default values are NULL
18
#' @param pval.testname logical: add '(Log-rank)' text to p-value. Default = F
19
#' @param marks logical: should censoring marks be added?
20
#' @param shape what shape should the censoring marks be, default is a vertical line
21
#' @param legend logical. should a legend be added to the plot?
22
#' @param legendposition numeric. x, y position of the legend if plotted. Default=c(0.85,0.8)
23
#' @param ci logical. Should confidence intervals be plotted. Default = FALSE
24
#' @param subs = NULL,
25
#' @param label.nrisk Numbers at risk label. Default = "Numbers at risk"
26
#' @param size.label.nrisk Font size of label.nrisk. Default = 10
27
#' @param linecols Character. Colour brewer pallettes too colour lines. Default ="Set1", "black" for black with dashed line.
28
#' @param dashed logical. Should a variety of linetypes be used to identify lines. Default = FALSE
29
#' @param cumhaz Show cumulaive hazard function, Default: F
30
#' @param cluster.option Cluster option for p value, Option: "None", "cluster", "frailty", Default: "None"
31
#' @param cluster.var Cluster variable
32
#' @param data select specific data - for reactive input. Default = NULL
33
#' @param ... PARAM_DESCRIPTION
34
#' @return Plot
35
#' @details DETAILS
36
#' @author Jinseob Kim, but heavily modified version of a script created by Michael Way.
37
#' \url{https://github.com/michaelway/ggkm/}
38
#' I have packaged this function, added functions to namespace and included a range of new parameters.
39
#' @examples
40
#'  library(survival)
41
#'  data(colon)
42
#'  fit <- survfit(Surv(time,status)~rx, data=colon)
43
#'  jskm(fit, timeby=500)
44
#' @rdname jskm
45
#' @importFrom ggplot2 ggplot
46
#' @importFrom ggplot2 aes
47
#' @importFrom ggplot2 geom_step
48
#' @importFrom ggplot2 scale_linetype_manual
49
#' @importFrom ggplot2 scale_colour_manual
50
#' @importFrom ggplot2 theme_bw
51
#' @importFrom ggplot2 theme
52
#' @importFrom ggplot2 element_text
53
#' @importFrom ggplot2 scale_x_continuous
54
#' @importFrom ggplot2 scale_y_continuous
55
#' @importFrom ggplot2 element_blank
56
#' @importFrom ggplot2 element_line
57
#' @importFrom ggplot2 element_rect
58
#' @importFrom ggplot2 labs
59
#' @importFrom ggplot2 ggtitle
60
#' @importFrom ggplot2 geom_point
61
#' @importFrom ggplot2 geom_blank
62
#' @importFrom ggplot2 annotate
63
#' @importFrom ggplot2 geom_text
64
#' @importFrom ggplot2 scale_y_discrete
65
#' @importFrom ggplot2 xlab
66
#' @importFrom ggplot2 ylab
67
#' @importFrom ggplot2 ggsave
68
#' @importFrom ggplot2 scale_colour_brewer
69
#' @importFrom ggplot2 geom_ribbon
70
#' @importFrom grid unit
71
#' @importFrom gridExtra grid.arrange
72
#' @importFrom plyr rbind.fill
73
#' @importFrom stats pchisq time as.formula
74
#' @importFrom survival survfit survdiff coxph Surv cluster frailty
75
#' @export
76
 
77

78
jskm <- function(sfit,
79
                 table = FALSE,
80
                 xlabs = "Time-to-event",
81
                 ylabs = "Survival probability",
82
                 xlims = c(0,max(sfit$time)),
83
                 ylims = c(0,1),
84
                 surv.scale = c("default", "percent"),
85
                 ystratalabs = names(sfit$strata),
86
                 ystrataname = "Strata",
87
                 timeby = signif(max(sfit$time)/7, 1),
88
                 main = "",
89
                 pval = FALSE,
90
                 pval.size = 5, 
91
                 pval.coord = c(NULL, NULL),
92
                 pval.testname = F,
93
                 marks = TRUE,
94
                 shape = 3,
95
                 legend = TRUE,
96
                 legendposition=c(0.85,0.8),
97
                 ci = FALSE,
98
                 subs = NULL,
99
                 label.nrisk = "Numbers at risk",
100
                 size.label.nrisk = 10,
101
                 linecols="Set1",
102
                 dashed= FALSE,
103
                 cumhaz = F,
104
                 cluster.option = "None",
105
                 cluster.var = NULL,
106
                 data = NULL,
107
                 ...) {
108
  
109
  
110
  #################################
111
  # sorting the use of subsetting #
112
  #################################
113
  
114 1
  n.risk <- n.censor <- surv <- strata <- lower <- upper <- NULL
115
  
116 1
  times <- seq(0, max(sfit$time), by = timeby)
117
  
118 1
  if(is.null(subs)){
119 1
    if(length(levels(summary(sfit)$strata)) == 0) {
120 0
      subs1 <- 1
121 0
      subs2 <- 1:length(summary(sfit,censored=T)$time)
122 0
      subs3 <- 1:length(summary(sfit,times = times,extend = TRUE)$time)
123
    } else {
124 1
      subs1 <- 1:length(levels(summary(sfit)$strata))
125 1
      subs2 <- 1:length(summary(sfit,censored=T)$strata)
126 1
      subs3 <- 1:length(summary(sfit,times = times,extend = TRUE)$strata)
127
    }
128
  } else{
129 0
    for(i in 1:length(subs)){
130 0
      if(i==1){
131 0
        ssvar <- paste("(?=.*\\b=",subs[i],sep="")
132
      }
133 0
      if(i==length(subs)){
134 0
        ssvar <- paste(ssvar,"\\b)(?=.*\\b=",subs[i],"\\b)",sep="")
135
      }
136 0
      if(!i %in% c(1, length(subs))){
137 0
        ssvar <- paste(ssvar,"\\b)(?=.*\\b=",subs[i],sep="")
138
      }
139 0
      if(i==1 & i==length(subs)){
140 0
        ssvar <- paste("(?=.*\\b=",subs[i],"\\b)",sep="")
141
      }
142
    }
143 0
    subs1 <- which(regexpr(ssvar,levels(summary(sfit)$strata), perl=T)!=-1)
144 0
    subs2 <- which(regexpr(ssvar,summary(sfit,censored=T)$strata, perl=T)!=-1)
145 0
    subs3 <- which(regexpr(ssvar,summary(sfit,times = times,extend = TRUE)$strata, perl=T)!=-1)
146
  }
147
  
148 0
  if(!is.null(subs)) pval <- FALSE
149
  
150
  ##################################
151
  # data manipulation pre-plotting #
152
  ##################################
153
  
154
  
155
  
156 1
  if(length(levels(summary(sfit)$strata)) == 0) {
157
    #[subs1]
158 0
    if(is.null(ystratalabs)) ystratalabs <- as.character(sub("group=*","","All"))
159
  } else {
160
    #[subs1]
161 0
    if(is.null(ystratalabs)) ystratalabs <- as.character(sub("group=*","",names(sfit$strata)))
162
  }
163
  
164 0
  if(is.null(ystrataname)) ystrataname <- "Strata"
165 1
  m <- max(nchar(ystratalabs))
166 1
  times <- seq(0, max(sfit$time), by = timeby)
167
  
168 1
  if(length(levels(summary(sfit)$strata)) == 0) {
169 0
    Factor <- factor(rep("All",length(subs2)))
170
  } else {
171 1
    Factor <- factor(summary(sfit, censored = T)$strata[subs2])
172
  }
173
  
174
  #Data to be used in the survival plot
175
  
176 1
  df <- data.frame(
177 1
    time = sfit$time[subs2],
178 1
    n.risk = sfit$n.risk[subs2],
179 1
    n.event = sfit$n.event[subs2],
180 1
    n.censor = sfit$n.censor[subs2],
181 1
    surv = sfit$surv[subs2],
182 1
    strata = Factor,
183 1
    upper = sfit$upper[subs2],
184 1
    lower = sfit$lower[subs2]
185
  )
186
  
187 1
  if (cumhaz){
188 1
    df$surv = 1 - sfit$surv[subs2]
189 1
    df$lower = 1 - sfit$upper[subs2]
190 1
    df$upper = 1 - sfit$lower[subs2]
191
    
192
  }
193
  
194
  #Final changes to data for survival plot
195 1
  levels(df$strata) <- ystratalabs
196 1
  zeros <- data.frame(time = 0, surv = 1,
197 1
                      strata = factor(ystratalabs, levels=levels(df$strata)),
198 1
                      upper = 1, lower = 1)
199 1
  if (cumhaz){
200 1
    zeros$surv = 0
201 1
    zeros$lower = 0
202 1
    zeros$upper = 0
203
  }
204
  
205 1
  df <- plyr::rbind.fill(zeros, df)
206 1
  d <- length(levels(df$strata))
207
  
208
  ###################################
209
  # specifying axis parameteres etc #
210
  ###################################
211
  
212 1
  if(dashed == TRUE | linecols == "black"){
213 0
    linetype=c("solid", "dashed", "dotted", "dotdash", "longdash", "twodash", "1F", "F1", "4C88C488", "12345678")
214
  } else {
215 1
    linetype=c("solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid")
216
  }
217
  
218
  # Scale transformation
219
  #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
220 1
  surv.scale <- match.arg(surv.scale)
221 1
  scale_labels <-  ggplot2::waiver()
222 1
  if (surv.scale == "percent") scale_labels <- scales::percent
223
  
224 1
  p <- ggplot2::ggplot( df, aes(x=time, y=surv, colour=strata, linetype=strata)) + ggtitle(main)
225
  
226 1
  linecols2 <- linecols
227 1
  if (linecols == "black"){
228 0
    linecols <- "Set1"
229 0
    p <- ggplot2::ggplot( df, aes(x=time, y=surv, linetype=strata)) + ggtitle(main)
230
  }
231
  
232
  
233
  #Set up theme elements
234 1
  p <- p + theme_bw() +
235 1
    theme(axis.title.x = element_text(vjust = 0.7),
236 1
          panel.grid.minor = element_blank(),
237 1
          axis.line = element_line(size =0.5, colour = "black"),
238 1
          legend.position = legendposition,
239 1
          legend.background = element_rect(fill = NULL),
240 1
          legend.key = element_rect(colour = NA),
241 1
          panel.border = element_blank(),
242 1
          plot.margin = unit(c(0, 1, .5,ifelse(m < 10, 1.5, 2.5)),"lines"),
243 1
          panel.grid.major = element_blank(),
244 1
          axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
245 1
          axis.line.y = element_line(size = 0.5, linetype = "solid", colour = "black")) +
246 1
    scale_x_continuous(xlabs, breaks = times, limits = xlims) +
247 1
    scale_y_continuous(ylabs, limits = ylims, labels = scale_labels)
248
  
249
  
250
  
251
  #Removes the legend:
252 1
  if(legend == FALSE)
253 1
    p <- p + theme(legend.position="none")
254
  
255
  #Add lines too plot
256 1
  p <- p + geom_step(size = 0.75) +
257 1
    scale_linetype_manual(name = ystrataname, values=linetype) +
258 1
    scale_colour_brewer(name = ystrataname, palette=linecols)
259
  
260
  #Add censoring marks to the line:
261 1
  if(marks == TRUE)
262 1
    p <- p + geom_point(data = subset(df, n.censor >= 1), aes(x = time, y = surv), shape = shape, colour = "black")
263
  
264
  #Add 95% CI to plot
265 1
  if(ci == TRUE){
266 1
    if (linecols2 == "black"){
267 0
      p <- p +  geom_ribbon(data=df, aes(ymin = lower, ymax = upper), alpha=0.25, colour=NA) 
268
    } else{
269 1
      p <- p +  geom_ribbon(data=df, aes(ymin = lower, ymax = upper, fill = strata), alpha=0.25, colour=NA) + scale_fill_brewer(name = ystrataname, palette=linecols)
270
    } 
271
  }
272
  
273
  ## Create a blank plot for place-holding
274 1
  blank.pic <- ggplot(df, aes(time, surv)) +
275 1
    geom_blank() + theme_void() +             ## Remove gray color
276 1
    theme(axis.text.x = element_blank(),axis.text.y = element_blank(),
277 1
          axis.title.x = element_blank(),axis.title.y = element_blank(),
278 1
          axis.ticks = element_blank(),
279 1
          panel.grid.major = element_blank(),panel.border = element_blank())
280
  
281
  #####################
282
  # p-value placement #
283
  #####################a
284
  
285 0
  if(length(levels(summary(sfit)$strata)) == 0) pval <- FALSE
286
  
287 1
  if(pval == TRUE) {
288 1
    if (is.null(data)){
289 1
      data = eval(sfit$call$data)
290
    }
291
  
292 1
    sdiff <- survival::survdiff(as.formula(sfit$call$formula), data = data)
293 1
    pvalue <- pchisq(sdiff$chisq,length(sdiff$n) - 1,lower.tail = FALSE)
294
    
295
    ## cluster option
296 1
    if (cluster.option == "cluster" & !is.null(cluster.var)){
297 1
      form.old <- as.character(sfit$call$formula)
298 1
      form.new <- paste(form.old[2], form.old[1], " + ", form.old[3], " + cluster(", cluster.var, ")", sep="")
299 1
      sdiff <- survival::coxph(as.formula(form.new), data = data, model = T, robust = T)
300 1
      pvalue <- summary(sdiff)$robscore["pvalue"]
301 1
    } else if (cluster.option == "frailty" & !is.null(cluster.var)){
302 1
      form.old <- as.character(sfit$call$formula)
303 1
      form.new <- paste(form.old[2], form.old[1], " + ", form.old[3], " + frailty(", cluster.var, ")", sep="")
304 1
      sdiff <- survival::coxph(as.formula(form.new), data =data, model = T)
305 1
      pvalue <- summary(sdiff)$logtest["pvalue"]
306
    }
307
    
308 1
    pvaltxt <- ifelse(pvalue < 0.0001,"p < 0.0001",paste("p =", round(pvalue, 4)))
309
    
310 0
    if (pval.testname) pvaltxt <- paste0(pvaltxt, " (Log-rank)")
311
    
312
    # MOVE P-VALUE LEGEND HERE BELOW [set x and y]
313 1
    if (is.null(pval.coord)){
314 1
      p <- p + annotate("text",x = (as.integer(max(sfit$time)/5)), y = 0.1 + ylims[1],label = pvaltxt, size  = pval.size)
315
    } else{
316 0
      p <- p + annotate("text",x = pval.coord[1], y = pval.coord[2], label = pvaltxt, size  = pval.size)
317
    }
318
    
319
  }
320
  
321
  ###################################################
322
  # Create table graphic to include at-risk numbers #
323
  ###################################################
324
  
325 1
  n.risk <- NULL
326 1
  if(length(levels(summary(sfit)$strata)) == 0) {
327 0
    Factor <- factor(rep("All",length(subs3)))
328
  } else {
329 1
    Factor <- factor(summary(sfit,times = times,extend = TRUE)$strata[subs3])
330
  }
331
  
332 1
  if(table == TRUE) {
333 1
    risk.data <- data.frame(
334 1
      strata = Factor,
335 1
      time = summary(sfit,times = times,extend = TRUE)$time[subs3],
336 1
      n.risk = summary(sfit,times = times,extend = TRUE)$n.risk[subs3]
337
    )
338
    
339 1
    risk.data$strata <- factor(risk.data$strata, levels=rev(levels(risk.data$strata)))
340
    
341 1
    data.table <- ggplot(risk.data,aes(x = time, y = strata, label = format(n.risk, nsmall = 0))) + 
342 1
      geom_text(size = 3.5) + theme_bw() +
343 1
      scale_y_discrete(breaks = as.character(levels(risk.data$strata)),
344 1
                       labels = rev(ystratalabs)) +
345 1
      scale_x_continuous(label.nrisk, limits = xlims) +
346 1
      theme(axis.title.x = element_text(size = size.label.nrisk, vjust = 1),
347 1
            panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
348 1
            panel.border = element_blank(),axis.text.x = element_blank(),
349 1
            axis.ticks = element_blank(),axis.text.y = element_text(face = "bold",hjust = 1)) 
350 1
    data.table <- data.table +
351 1
      theme(legend.position = "none") + xlab(NULL) + ylab(NULL)
352
    
353
    
354
    # ADJUST POSITION OF TABLE FOR AT RISK
355 1
    data.table <- data.table +
356 1
      theme(plot.margin = unit(c(-1.5, 1, 0.1, ifelse(m < 10, 2.5, 3.5) - 0.15 * m), "lines"))
357
  }
358
  
359
  
360
  #######################
361
  # Plotting the graphs #
362
  #######################
363
  
364 1
  if(table == TRUE){
365 1
    grid.arrange(p, blank.pic, data.table, clip = FALSE, nrow = 3,
366 1
                 ncol = 1, heights = unit(c(2, .1, .25),c("null", "null", "null")))
367
  } else {
368 1
    p
369
  }
370
  
371
}

Read our documentation on viewing source code .

Loading