jinseob2kim / jskm
1
#' @title Creates a Weighted Kaplan-Meier plot - svykm.object in survey package
2
#' @description Creates a Weighted Kaplan-Meier plot - svykm.object in survey package
3
#' @param sfit a svykm object
4
#' @param xlabs x-axis label, Default: 'Time-to-event'
5
#' @param ylabs y-axis label.
6
#' @param xlims numeric: list of min and max for x-axis. Default: NULL
7
#' @param ylims numeric: list of min and max for y-axis. Default: c(0, 1)
8
#' @param surv.scale 	scale transformation of survival curves. Allowed values are "default" or "percent".
9
#' @param ystratalabs character list. A list of names for each strata. Default: NULL
10
#' @param ystrataname The legend name. Default: 'Strata'
11
#' @param timeby numeric: control the granularity along the time-axis; defaults to 7 time-points. 
12
#' @param main plot title, Default: ''
13
#' @param pval logical: add the pvalue to the plot?, Default: FALSE
14
#' @param pval.size numeric value specifying the p-value text size. Default is 5.
15
#' @param pval.coord numeric vector, of length 2, specifying the x and y coordinates of the p-value. Default values are NULL
16
#' @param pval.testname logical: add '(Log-rank)' text to p-value. Default = F
17
#' @param legend logical. should a legend be added to the plot? Default: TRUE
18
#' @param ci logical. Should confidence intervals be plotted. Default = NULL
19
#' @param legendposition numeric. x, y position of the legend if plotted. Default: c(0.85, 0.8)
20
#' @param linecols Character. Colour brewer pallettes too colour lines. Default: 'Set1', "black" for black with dashed line.
21
#' @param dashed logical. Should a variety of linetypes be used to identify lines. Default: FALSE
22
#' @param cumhaz Show cumulaive hazard function, Default: F
23
#' @param design Data design for reactive design data , Default: NULL
24
#' @param subs = NULL,
25
#' @param table logical: Create a table graphic below the K-M plot, indicating at-risk numbers?
26
#' @param label.nrisk Numbers at risk label. Default = "Numbers at risk"
27
#' @param size.label.nrisk Font size of label.nrisk. Default = 10
28
#' @param cut.landmark cut-off for landmark analysis, Default = NULL
29
#' @param ... PARAM_DESCRIPTION
30
#' @return plot
31
#' @details DETAILS
32
#' @examples 
33
#'  library(survey)
34
#'  data(pbc, package="survival")
35
#'  pbc$randomized <- with(pbc, !is.na(trt) & trt>0)
36
#'  biasmodel <- glm(randomized~age*edema,data=pbc)
37
#'  pbc$randprob <- fitted(biasmodel)
38
#'  dpbc <- svydesign(id=~1, prob=~randprob, strata=~edema, data=subset(pbc,randomized))
39
#'  s1 <- svykm(Surv(time,status>0)~sex, design=dpbc)
40
#'  svyjskm(s1)
41
#' @rdname svyjskm
42
#' @import ggplot2
43
#' @importFrom stats formula
44
#' @importFrom survey svyranktest
45
#' @importFrom survival Surv
46
#' @export 
47

48
svyjskm <- function(sfit,
49
                    xlabs = "Time-to-event",
50
                    ylabs = "Survival probability",
51
                    xlims = NULL,
52
                    ylims = c(0,1),
53
                    ystratalabs = NULL,
54
                    ystrataname = NULL,
55
                    surv.scale = c("default", "percent"),
56
                    timeby = NULL,
57
                    main = "",
58
                    pval = FALSE,
59
                    pval.size = 5, 
60
                    pval.coord = c(NULL, NULL),
61
                    pval.testname = F,
62
                    legend = TRUE,
63
                    legendposition=c(0.85,0.8),
64
                    ci = NULL,
65
                    linecols="Set1",
66
                    dashed= FALSE,
67
                    cumhaz = F,
68
                    design = NULL,
69
                    subs = NULL,
70
                    table = F,
71
                    label.nrisk = "Numbers at risk",
72
                    size.label.nrisk = 10,
73
                    cut.landmark = NULL,
74
                    ...) {
75
  
76 1
  surv <- strata <- lower <- upper <- NULL
77
  
78
  
79 1
  if (is.null(timeby)){
80 1
    if (class(sfit) == "svykmlist"){
81 1
      timeby <- signif(max(sapply(sfit, function(x){max(x$time)}))/7, 1)
82 1
    } else if(class(sfit) == "svykm"){
83
      
84 1
      timeby <- signif(max(sfit$time)/7, 1)
85
      }
86
  }
87
  
88 1
  if (is.null(ci)){
89 1
    if (class(sfit) == "svykmlist"){
90 1
      ci <- "varlog" %in% names(sfit[[1]])
91 0
    } else if (class(sfit) == "svykm"){
92 0
      ci <- "varlog" %in% names(sfit)
93
    }
94
  }
95
  
96
  
97 1
  if (ci & !is.null(cut.landmark)){
98 0
    if (is.null(design)){
99 0
      design <- tryCatch(get(as.character(attr(sfit, "call")$design)), error = function(e) e)
100 0
      if ("error" %in% class(design)){
101 0
        stop("'pval' option requires design object. please input 'design' option") 
102
      }
103
    }
104 0
    var.time <- as.character(formula(sfit)[[2]][[2]])
105 0
    var.event <- as.character(formula(sfit)[[2]][[3]])
106 0
    if (length(var.event) > 1){
107 0
      var.event <- setdiff(var.event, as.character(as.symbol(var.event)))
108 0
      var.event <- var.event[sapply(var.event, function(x) {"warning" %in% class(tryCatch(as.numeric(x), warning = function(w) w))})]
109
    }
110 0
    design1 <- design
111 0
    design1$variables[[var.event]][design1$variables[[var.time]] >= cut.landmark] <- 0
112 0
    design1$variables[[var.time]][design1$variables[[var.time]] >= cut.landmark] <- cut.landmark
113
    
114 0
    sfit2 <- survey::svykm(formula(sfit), design = subset(design, get(var.time) >= cut.landmark), se = T)
115
  }
116
  
117
  
118
  
119
  
120 1
  if (class(sfit) == "svykmlist"){
121 1
    if(is.null(ystrataname)) ystrataname <- as.character(formula(sfit)[[3]])
122

123 1
    if (ci){
124 1
      if ("varlog" %in% names(sfit[[1]])){
125 1
        df <- do.call(rbind, lapply(names(sfit), function(x){data.frame("strata" = x, "time" = sfit[[x]]$time, "surv" = sfit[[x]]$surv, "lower" = pmax(0, exp(log(sfit[[x]]$surv) - 1.96 * sqrt(sfit[[x]]$varlog))), "upper" = pmin(1, exp(log(sfit[[x]]$surv) + 1.96 * sqrt(sfit[[x]]$varlog))))}))
126 1
        if (!is.null(cut.landmark)){
127 0
          df2 <- do.call(rbind, lapply(names(sfit2), function(x){data.frame("strata" = x, "time" = sfit2[[x]]$time, "surv" = sfit2[[x]]$surv, "lower" = pmax(0, exp(log(sfit2[[x]]$surv) - 1.96 * sqrt(sfit2[[x]]$varlog))), "upper" = pmin(1, exp(log(sfit2[[x]]$surv) + 1.96 * sqrt(sfit2[[x]]$varlog))))}))
128 0
          df <- rbind(df[df$time < cut.landmark, ], data.frame("strata" = unique(df$strata), "time" = cut.landmark, "surv" = 1, "lower" = 1, "upper" = 1), df2)
129
        }
130
        
131
      } else{
132 1
        stop("No CI information in svykmlist object. please run svykm with se = T option.")
133
      }
134
    } else{
135 1
      df <- do.call(rbind, lapply(names(sfit), function(x){data.frame("strata" = x, "time" = sfit[[x]]$time, "surv" = sfit[[x]]$surv)}))
136 1
      if (!is.null(cut.landmark)){
137 0
        for (v in unique(df$strata)){
138 0
          if (nrow(subset(df, time == cut.landmark & strata == v)) == 0){
139 0
            df <- rbind(df, data.frame(strata = v, time = cut.landmark, surv = 1))
140
          } else{
141 0
            df[df$time == cut.landmark & df$strata == v, "surv"] <- 1
142
          }
143
          
144 0
          df[df$time > cut.landmark & df$strata == v, "surv"] <- df[df$time > cut.landmark & df$strata == v, "surv"]/min(df[df$time < cut.landmark & df$strata == v, "surv"])
145
        }
146
      }
147
    }
148
    
149 1
    df$strata <- as.factor(df$strata)
150 1
    times <- seq(0, max(sapply(sfit, function(x){max(x$time)})), by = timeby)
151 1
    if (is.null(ystratalabs)){
152 1
      ystratalabs <- names(sfit)
153
    }
154 1
    if (is.null(xlims)){
155 1
      xlims <- c(0,max(sapply(sfit, function(x){max(x$time)})))
156
    }
157
    
158 1
  } else if(class(sfit) == "svykm"){
159 1
    if(is.null(ystrataname)) ystrataname <- "Strata"
160

161 1
    if (ci){
162 1
      if ("varlog" %in% names(sfit)){
163 1
        df <- data.frame("strata" = "All", "time" = sfit$time, "surv" = sfit$surv,  "lower" = pmax(0, exp(log(sfit$surv) - 1.96 * sqrt(sfit$varlog))), "upper" = pmax(0, exp(log(sfit$surv) + 1.96 * sqrt(sfit$varlog))))
164 1
        if (!is.null(cut.landmark)){
165 0
          df2 <- data.frame("strata" = "All", "time" = sfit2$time, "surv" = sfit2$surv,  "lower" = pmax(0, exp(log(sfit2$surv) - 1.96 * sqrt(sfit2$varlog))), "upper" = pmax(0, exp(log(sfit2$surv) + 1.96 * sqrt(sfit2$varlog))))
166 0
          df <- rbind(df[df$time < cut.landmark, ], data.frame("strata" = "All", "time" = cut.landmark, "surv" = 1, "lower" = 1, "upper" = 1), df2)
167
        }
168
      } else{
169 1
        stop("No CI information in svykm object. please run svykm with se = T option.")
170
      }
171
    } else{
172 1
      df <- data.frame("strata" = "All", "time" = sfit$time, "surv" = sfit$surv)
173 1
      if (!is.null(cut.landmark)){
174 0
        if (nrow(subset(df, time == cut.landmark)) == 0){
175 0
          df <- rbind(df, data.frame(strata = "All", time = cut.landmark, surv = 1))
176
        } else{
177 0
          df[df$time == cut.landmark, "surv"] <- 1
178
        }
179
        
180 0
        df[df$time > cut.landmark, "surv"] <- df[df$time > cut.landmark, "surv"]/min(df[df$time < cut.landmark, "surv"])
181
      }
182
    }
183
    
184 1
    times <- seq(0, max(sfit$time), by = timeby)
185 1
    if (is.null(ystratalabs)){
186 1
      ystratalabs <- "All"
187
    }
188 1
    if (is.null(xlims)){
189 1
      xlims <- c(0,max(sfit$time))
190
    }
191
  }
192
  
193 1
  m <- max(nchar(ystratalabs))
194
  
195
  
196
  
197
  
198
  
199 1
  if (cumhaz){
200 1
    df$surv <- 1 - df$surv
201 1
    if (ci){
202 1
      upper.new <- 1 - df$lower
203 1
      lower.new <- 1 - df$upper
204 1
      df$lower = lower.new
205 1
      df$upper = upper.new
206
    }
207
    }
208
  
209
  #Final changes to data for survival plot
210 1
  levels(df$strata) <- ystratalabs
211 1
  zeros <- data.frame("strata" = factor(ystratalabs, levels=levels(df$strata)), "time" = 0, "surv" = 1)
212 1
  if (ci){
213 1
    zeros$upper <- 1
214 1
    zeros$lower <- 1
215
  }
216
  
217 1
  if (cumhaz){
218 1
    zeros$surv <- 0
219 1
    if (ci){
220 1
      zeros$lower <- 0
221 1
      zeros$upper <- 0
222
    }
223
  }
224
  
225 1
  df <- rbind(zeros, df)
226 1
  d <- length(levels(df$strata))
227
  
228
  ###################################
229
  # specifying axis parameteres etc #
230
  ###################################
231
  
232 1
  if(dashed == TRUE | linecols == "black"){
233 0
    linetype=c("solid", "dashed", "dotted", "dotdash", "longdash", "twodash", "1F", "F1", "4C88C488", "12345678")
234
  } else {
235 1
    linetype=c("solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid", "solid")
236
  }
237
  
238
  # Scale transformation
239
  #::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
240 1
  surv.scale <- match.arg(surv.scale)
241 1
  scale_labels <-  ggplot2::waiver()
242 1
  if (surv.scale == "percent") scale_labels <- scales::percent
243
  
244
  
245 1
  p <- ggplot2::ggplot( df, aes(x=time, y=surv, colour=strata, linetype=strata)) + ggtitle(main)
246 1
  linecols2 <- linecols
247 1
  if (linecols == "black"){
248 0
    linecols <- "Set1"
249 0
    p <- ggplot2::ggplot( df, aes(x=time, y=surv, linetype=strata)) + ggtitle(main)
250
  }
251
  
252
  #Set up theme elements
253 1
  p <- p + theme_bw() +
254 1
    theme(axis.title.x = element_text(vjust = 0.7),
255 1
          panel.grid.minor = element_blank(),
256 1
          axis.line = element_line(size =0.5, colour = "black"),
257 1
          legend.position = legendposition,
258 1
          legend.background = element_rect(fill = NULL),
259 1
          legend.key = element_rect(colour = NA),
260 1
          panel.border = element_blank(),
261 1
          plot.margin = unit(c(0, 1, .5,ifelse(m < 10, 1.5, 2.5)),"lines"),
262 1
          panel.grid.major = element_blank(),
263 1
          axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
264 1
          axis.line.y = element_line(size = 0.5, linetype = "solid", colour = "black")) +
265 1
    scale_x_continuous(xlabs, breaks = times, limits = xlims) +
266 1
    scale_y_continuous(ylabs, limits = ylims, labels = scale_labels)
267
  
268
  
269
 
270
  #Removes the legend:
271 1
  if(legend == FALSE)
272 0
    p <- p + theme(legend.position="none")
273
  
274
  #Add lines too plot
275 1
  p <- p + geom_step(size = 0.75) +
276 1
    scale_linetype_manual(name = ystrataname, values=linetype) +
277 1
    scale_colour_brewer(name = ystrataname, palette=linecols)
278
  
279
  #Add 95% CI to plot
280 1
  if(ci){
281 1
    if (linecols2 == "black"){
282 0
      p <- p +  geom_ribbon(data=df, aes(ymin = lower, ymax = upper), alpha=0.25, colour=NA) 
283
    } else{
284 1
      p <- p +  geom_ribbon(data=df, aes(ymin = lower, ymax = upper, fill = strata), alpha=0.25, colour=NA) + scale_fill_brewer(name = ystrataname, palette=linecols)
285
    } 
286
  }
287
  
288 1
  if (!is.null(cut.landmark)){
289 0
    p <- p + geom_vline(xintercept = cut.landmark, lty = 2)
290
  }
291
  
292
  ## p-value
293 1
  if(class(sfit) == "svykm") pval <- FALSE
294
  #if(is.null(design)) pval <- FALSE
295
  
296 1
  if(pval) {
297 1
    if (is.null(design)){
298 0
      design <- tryCatch(get(as.character(attr(sfit, "call")$design)), error = function(e) e)
299 0
      if ("error" %in% class(design)){
300 0
        stop("'pval' option requires design object. please input 'design' option") 
301
      }
302
    }
303
    
304 1
    if (is.null(cut.landmark)){
305 1
      sdiff <- survey::svylogrank(formula(sfit), design = design)
306 1
      pvalue <- sdiff[[2]][2]
307
      
308 1
      pvaltxt <- ifelse(pvalue < 0.001, "p < 0.001", paste("p =", round(pvalue, 3)))
309 1
      if (pval.testname) pvaltxt <- paste0(pvaltxt, " (Log-rank)")
310
      
311
      # MOVE P-VALUE LEGEND HERE BELOW [set x and y]
312 1
      if (is.null(pval.coord)){
313 1
        p <- p + annotate("text",x = as.integer(max(sapply(sfit, function(x){max(x$time)/10}))), y = 0.1 + ylims[1],label = pvaltxt, size  = pval.size)
314
      } else{
315 1
        p <- p + annotate("text",x = pval.coord[1], y = pval.coord[2], label = pvaltxt, size  = pval.size)
316
      } 
317
    } else{
318 0
      if (is.null(design)){
319 0
        design <- tryCatch(get(as.character(attr(sfit, "call")$design)), error = function(e) e)
320 0
        if ("error" %in% class(design)){
321 0
          stop("'pval' option requires design object. please input 'design' option") 
322
        }
323
      }
324 0
      var.time <- as.character(formula(sfit)[[2]][[2]])
325 0
      var.event <- as.character(formula(sfit)[[2]][[3]])
326 0
      if (length(var.event) > 1){
327 0
        var.event <- setdiff(var.event, as.character(as.symbol(var.event)))
328 0
        var.event <- var.event[sapply(var.event, function(x) {"warning" %in% class(tryCatch(as.numeric(x), warning = function(w) w))})]
329
      }
330 0
      design1 <- design
331 0
      design1$variables[[var.event]][design1$variables[[var.time]] >= cut.landmark] <- 0
332 0
      design1$variables[[var.time]][design1$variables[[var.time]] >= cut.landmark] <- cut.landmark
333
      
334 0
      sdiff1 <- survey::svylogrank(formula(sfit), design = design1)
335 0
      sdiff2 <- survey::svylogrank(formula(sfit), design = subset(design, get(var.time) >= cut.landmark))
336 0
      pvalue <- sapply(list(sdiff1, sdiff2), function(x){x[[2]][2]})
337
      
338 0
      pvaltxt <- ifelse(pvalue < 0.001, "p < 0.001", paste("p =", round(pvalue, 3)))
339 0
      if (pval.testname) pvaltxt <- paste0(pvaltxt, " (Log-rank)")
340
      
341 0
      if (is.null(pval.coord)){
342 0
        p <- p + annotate("text",x = c(as.integer(max(sapply(sfit, function(x){max(x$time)/10}))), as.integer(max(sapply(sfit, function(x){max(x$time)/10}))) + cut.landmark), y = 0.1 + ylims[1],label = pvaltxt, size  = pval.size)
343
      } else{
344 0
        p <- p + annotate("text",x = c(pval.coord[1], pval.coord[1] + cut.landmark), y = pval.coord[2], label = pvaltxt, size  = pval.size)
345
      }
346
      
347
    }
348
    
349
  }
350
  
351
  ## Create a blank plot for place-holding
352 1
  blank.pic <- ggplot(df, aes(time, surv)) +
353 1
    geom_blank() + theme_void() +             ## Remove gray color
354 1
    theme(axis.text.x = element_blank(),axis.text.y = element_blank(),
355 1
          axis.title.x = element_blank(),axis.title.y = element_blank(),
356 1
          axis.ticks = element_blank(),
357 1
          panel.grid.major = element_blank(),panel.border = element_blank())
358
  
359
  ###################################################
360
  # Create table graphic to include at-risk numbers #
361
  ###################################################
362
  
363 1
  n.risk <- NULL
364 1
  if(table == TRUE) {
365
    
366 1
    if(is.null(design)){
367 0
      sfit2 <- survival::survfit(formula(sfit), data = get(as.character(attr(sfit, "call")$design))$variables)
368
    } else{
369 1
      sfit2 <- survival::survfit(formula(sfit), data = design$variables)
370
    }
371
    
372
    #times <- seq(0, max(sfit2$time), by = timeby)
373
    
374 1
    if(is.null(subs)){
375 1
      if(length(levels(summary(sfit2)$strata)) == 0) {
376 0
        subs1 <- 1
377 0
        subs2 <- 1:length(summary(sfit2,censored=T)$time)
378 0
        subs3 <- 1:length(summary(sfit2,times = times,extend = TRUE)$time)
379
      } else {
380 1
        subs1 <- 1:length(levels(summary(sfit2)$strata))
381 1
        subs2 <- 1:length(summary(sfit2,censored=T)$strata)
382 1
        subs3 <- 1:length(summary(sfit2,times = times,extend = TRUE)$strata)
383
      }
384
    } else{
385 0
      for(i in 1:length(subs)){
386 0
        if(i==1){
387 0
          ssvar <- paste("(?=.*\\b=",subs[i],sep="")
388
        }
389 0
        if(i==length(subs)){
390 0
          ssvar <- paste(ssvar,"\\b)(?=.*\\b=",subs[i],"\\b)",sep="")
391
        }
392 0
        if(!i %in% c(1, length(subs))){
393 0
          ssvar <- paste(ssvar,"\\b)(?=.*\\b=",subs[i],sep="")
394
        }
395 0
        if(i==1 & i==length(subs)){
396 0
          ssvar <- paste("(?=.*\\b=",subs[i],"\\b)",sep="")
397
        }
398
      }
399 0
      subs1 <- which(regexpr(ssvar,levels(summary(sfit2)$strata), perl=T)!=-1)
400 0
      subs2 <- which(regexpr(ssvar,summary(sfit2,censored=T)$strata, perl=T)!=-1)
401 0
      subs3 <- which(regexpr(ssvar,summary(sfit2,times = times,extend = TRUE)$strata, perl=T)!=-1)
402
    }
403
    
404 0
    if(!is.null(subs)) pval <- FALSE
405
    
406

407
    
408 1
    if(length(levels(summary(sfit2)$strata)) == 0) {
409 0
      Factor <- factor(rep("All",length(subs3)))
410
    } else {
411 1
      Factor <- factor(summary(sfit2,times = times,extend = TRUE)$strata[subs3])
412
    }
413
    
414
    
415 1
    risk.data <- data.frame(
416 1
      strata = Factor,
417 1
      time = summary(sfit2,times = times,extend = TRUE)$time[subs3],
418 1
      n.risk = summary(sfit2,times = times,extend = TRUE)$n.risk[subs3]
419
    )
420
    
421
    
422 1
    risk.data$strata <- factor(risk.data$strata, levels=rev(levels(risk.data$strata)))
423
    
424 1
    data.table <- ggplot(risk.data,aes(x = time, y = strata, label = format(n.risk, nsmall = 0))) + 
425 1
      geom_text(size = 3.5) + theme_bw() +
426 1
      scale_y_discrete(breaks = as.character(levels(risk.data$strata)),
427 1
                       labels = rev(ystratalabs)) +
428 1
      scale_x_continuous(label.nrisk, limits = xlims) +
429 1
      theme(axis.title.x = element_text(size = size.label.nrisk, vjust = 1),
430 1
            panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
431 1
            panel.border = element_blank(),axis.text.x = element_blank(),
432 1
            axis.ticks = element_blank(),axis.text.y = element_text(face = "bold",hjust = 1)) 
433 1
    data.table <- data.table +
434 1
      theme(legend.position = "none") + xlab(NULL) + ylab(NULL)
435
    
436
    
437
    # ADJUST POSITION OF TABLE FOR AT RISK
438 1
    data.table <- data.table +
439 1
      theme(plot.margin = unit(c(-1.5, 1, 0.1, ifelse(m < 10, 2.5, 3.5) - 0.15 * m), "lines"))
440
  }
441
  
442
  #######################
443
  # Plotting the graphs #
444
  #######################
445
  
446 1
  if(table == TRUE){
447 1
    grid.arrange(p, blank.pic, data.table, clip = FALSE, nrow = 3,
448 1
                 ncol = 1, heights = unit(c(2, .1, .25),c("null", "null", "null")))
449
  } else {
450 1
    p
451
  }
452

453
  
454
}
455

456

457

Read our documentation on viewing source code .

Loading