1
# pROC: Tools Receiver operating characteristic (ROC curves) with
2
# (partial) area under the curve, confidence intervals and comparison. 
3
# Copyright (C) 2010-2014 Xavier Robin, Alexandre Hainard, Natacha Turck,
4
# Natalia Tiberti, Frédérique Lisacek, Jean-Charles Sanchez
5
# and Markus Müller
6
#
7
# This program is free software: you can redistribute it and/or modify
8
# it under the terms of the GNU General Public License as published by
9
# the Free Software Foundation, either version 3 of the License, or
10
# (at your option) any later version.
11
#
12
# This program is distributed in the hope that it will be useful,
13
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
# GNU General Public License for more details.
16
#
17
# You should have received a copy of the GNU General Public License
18
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
19

20
roc <- function(...) {
21 33
  UseMethod("roc")
22
}
23

24
roc.formula <- function (formula, data, ...) {
25 33
	data.missing <- missing(data)
26 33
	roc.data <- roc.utils.extract.formula(formula, data, ..., 
27 33
										  data.missing = data.missing,
28 33
										  call = match.call())
29 33
	response <- roc.data$response
30 33
	predictors <- roc.data$predictors
31
	
32 33
	if (length(response) == 0) {
33 33
		stop("Error in the formula: a response is required in a formula of type response~predictor.")
34
	}
35
	
36 33
	if (ncol(predictors) == 1) {
37 33
		roc <- roc.default(response, predictors[, 1], ...)
38 33
		roc$call <- match.call()
39 33
		if (!is.null(roc$smooth))
40 33
			attr(roc, "roc")$call <- roc$call
41 33
		return(roc)
42
	}
43 33
	else if (ncol(predictors) > 1) {
44 33
		roclist <- lapply(roc.data$predictor.names, function(predictor, formula, m.data, call, ...) {
45
			# Get one ROC
46 33
			roc <- roc.default(response, m.data[[predictor]], ...)
47
			# Update the call to reflect the parents
48 33
			formula[3] <- call(predictor) # replace the predictor in formula
49 33
			call$formula <- formula # Replace modified formula
50 33
			roc$call <- call
51 33
			return(roc)
52 33
		}, formula = formula, m.data = predictors, call = match.call(), ...)
53
		# Set the list names
54 33
		names(roclist) <- roc.data$predictor.names
55 33
		return(roclist)
56
	}
57
	else {
58 0
		stop("Invalid formula:at least 1 predictor is required in a formula of type response~predictor.")
59
	}
60
}
61

62
roc.data.frame <- function(data, response, predictor, 
63
                           ret = c("roc", "coords", "all_coords"),
64
                           ...) {
65 33
  ret <- match.arg(ret)
66
  
67 33
  if (is.character(substitute(response))) {
68 33
  	response_name <- response
69
  }
70
  else {
71 33
  	response_name <- deparse(substitute(response))
72
  }
73
  
74 33
  if (is.character(substitute(predictor))) {
75 33
  	predictor_name <- predictor
76
  }
77
  else {
78 33
  	predictor_name <- deparse(substitute(predictor))
79
  }
80
  
81 33
  if (any(! c(response_name, predictor_name) %in% colnames(data))) {
82
  	# Some column is not in data. This could be a genuine error or the user not aware or NSE and wants to use roc_ instead
83 33
  	warning("This method uses non-standard evaluation (NSE). Did you want to use the `roc_` function instead?")
84
  }
85
  
86 33
  r <- roc_(data, response_name, predictor_name, ret = ret, ...)
87
  
88 33
  if (ret == "roc") {
89 33
    r$call <- match.call()
90
  }
91 33
  return(r)
92
}
93

94
roc_ <- function(data, response, predictor,
95
                 ret = c("roc", "coords", "all_coords"),
96
                 ...) {
97 33
  ret <- match.arg(ret)
98
  
99
  # Ensure the data contains the columns we need
100
  # In case of an error we want to show the name of the data. If the function
101
  # was called from roc.data.frame we want to deparse in that environment instead
102 33
  if (sys.nframe() > 1 && deparse(sys.calls()[[sys.nframe()-1]][[1]]) == "roc.data.frame") {
103 33
  	data_name <- deparse(substitute(data, parent.frame(n = 1)))
104
  }
105
  else {
106 33
  	data_name <- deparse(substitute(data))
107
  }
108 33
  if (! response %in% colnames(data)) {
109 33
  	stop(sprintf("Column %s not present in data %s", response, data_name))
110
  }
111 33
  if (! predictor %in% colnames(data)) {
112 33
  	stop(sprintf("Column '%s' not present in data %s", predictor, data_name))
113
  }
114
  
115 33
  r <- roc(data[[response]], data[[predictor]], ...)
116
  
117 33
  if (ret == "roc") {
118 33
    r$call <- match.call()
119 33
    return(r)
120
  }
121 33
  else if (ret == "coords") {
122 33
    co <- coords(r, x = "all", transpose = FALSE)
123 33
    rownames(co) <- NULL
124 33
    return(co)
125
  }
126 33
  else if (ret == "all_coords") {
127 33
    co <- coords(r, x = "all", ret="all", transpose = FALSE)
128 33
    rownames(co) <- NULL
129 33
    return(co)
130
  }
131
}
132

133
roc.default <- function(response, predictor,
134
                        controls, cases,
135
                        density.controls, density.cases,
136
                        # data interpretation
137
                        levels=base::levels(as.factor(response)), # precise the levels of the responses as c("control group", "positive group"). Can be used to ignore some response levels.
138
                        
139
                        percent=FALSE, # Must sensitivities, specificities and AUC be reported in percent? Note that if TRUE, and you want a partial area, you must pass it in percent also (partial.area=c(100, 80))
140
                        na.rm=TRUE,
141
                        direction=c("auto", "<", ">"), # direction of the comparison. Auto: automatically define in which group the median is higher and take the good direction to have an AUC >= 0.5
142
                        algorithm=6,
143
						quiet = FALSE,
144

145
                        # what computation must be done
146
                        smooth=FALSE, # call smooth.roc on the current object
147
                        auc=TRUE, # call auc.roc on the current object
148
                        ci=FALSE, # call ci.roc on the current object
149
                        plot=FALSE, # call plot.roc on the current object
150

151
                        # disambiguate method/n for ci and smooth
152
                        smooth.method="binormal",
153
						smooth.n=512,
154
                        ci.method=NULL,
155
                        # capture density for smooth.roc here (do not pass to graphical functions)
156
                        density=NULL,
157
                        
158
                        # further arguments passed to plot, auc, ci, smooth.
159
                        ... 
160
                       
161
                        ) {
162
  # Check arguments
163 33
  direction <- match.arg(direction)
164
  # Response / Predictor
165 33
  if (!missing(response) && !is.null(response) && !missing(predictor) && !is.null(predictor)) {
166
  	# Forbid case/controls
167 33
  	if ((!missing(cases) && !is.null(cases)) || (!missing(controls) && !is.null(controls))) {
168 33
  		stop("'cases/controls' argument incompatible with 'response/predictor'.")
169
  	}
170
  	# Forbid density
171 33
  	if ((!missing(density.cases) && !is.null(density.cases)) || (!missing(density.controls) && !is.null(density.controls))) {
172 33
  		stop("'density.*' arguments incompatible with 'response/predictor'.")
173
  	}
174
  	
175 33
    original.predictor <- predictor # store a copy of the original predictor (before converting ordered to numeric and removing NA)
176 33
    original.response <- response # store a copy of the original predictor (before converting ordered to numeric)
177
    
178
    # Validate levels
179 33
    if (missing(levels)) {
180 33
    	if (length(levels) > 2) {
181 33
    		warning("'response' has more than two levels. Consider setting 'levels' explicitly or using 'multiclass.roc' instead")
182 33
    		levels <- levels[1:2]
183
    	}
184 33
    	else if (length(levels) < 2) {
185 0
    		stop("'response' must have two levels")
186
    	}
187 33
    	ifelse(quiet, invisible, message)(sprintf("Setting levels: control = %s, case = %s", levels[1], levels[2]))
188
    }
189 33
    else if (length(levels) != 2) {
190 33
    	stop("'levels' argument must have length 2")
191
    }
192

193
    # ensure predictor is numeric or ordered
194 33
    if (!is.numeric(predictor)) {
195 33
      if (is.ordered(predictor)) {
196 33
      	predictor <- tryCatch(
197
      		{
198 33
	      		as.numeric(as.character(predictor))
199
	      	},
200 33
	      	warning = function(warn) {
201 33
	      		warning("Ordered predictor converted to numeric vector. Threshold values will not correspond to values in predictor.")
202 33
	      		return(as.numeric(predictor))
203
	      	}
204
      	)
205
      }
206
      else {
207 33
        stop("Predictor must be numeric or ordered.")
208
      }
209
    }
210 33
    if (is.matrix(predictor)) {
211 33
    	warning("Deprecated use a matrix as predictor. Unexpected results may be produced, please pass a numeric vector.")
212
    }
213 33
    if (is.matrix(response)) {
214 33
    	warning("Deprecated use a matrix as response. Unexpected results may be produced, please pass a vector or factor.")
215
    }
216
    # also make sure response and predictor are vectors of the same length
217 33
    if (length(predictor) != length(response)) {
218 33
      stop("Response and predictor must be vectors of the same length.")
219
    }
220
    # remove NAs if requested
221 33
    if (na.rm) {
222 33
      nas <- is.na(response) | is.na(predictor)
223 33
      if (any(nas)) {
224 33
        na.action <- grep(TRUE, nas)
225 33
        class(na.action) <- "omit"
226 33
        response <- response[!nas]
227 33
        attr(response, "na.action") <- na.action
228 33
        predictor <- predictor[!nas]
229 33
        attr(predictor, "na.action") <- na.action
230
      }
231
    }
232 33
    else if(any(is.na(c(predictor[response==levels[1]], predictor[response==levels[2]], response)))) # Unable to compute anything if there is any NA in the response or in the predictor data we want to consider !
233 33
      return(NA)
234 33
    splitted <- split(predictor, response)
235 33
    controls <- splitted[[as.character(levels[1])]]
236 33
    if (length(controls) == 0)
237 33
      stop("No control observation.")
238 33
    cases <- splitted[[as.character(levels[2])]]
239 33
    if (length(cases) == 0)
240 33
      stop("No case observation.")
241

242
    # Remove patients not in levels
243 33
    patients.in.levels <- response %in% levels
244 33
    if (!all(patients.in.levels)) {
245 33
      response <- response[patients.in.levels]
246 33
      predictor <- predictor[patients.in.levels]
247
    }
248
    
249
    # Check infinities
250 33
    if (any(which <- is.infinite(predictor))) {
251 0
    	warning("Infinite values(s) in predictor, cannot build a valid ROC curve. NaN returned instead.")
252 0
    	return(NaN)
253
    }
254
  }
255

256
  # Cases / Controls
257 33
  else if (!missing(cases) && !is.null(cases) && !missing(controls) && !is.null(controls)) {
258
  	# Forbid density
259 33
  	if ((!missing(density.cases) && !is.null(density.cases)) || (!missing(density.controls) && !is.null(density.controls))) {
260 33
  		stop("'density.*' arguments incompatible with 'response/predictor'.")
261
  	}
262
  	# remove nas
263 33
  	if (na.rm) {
264 33
  		if (any(is.na(controls)))
265 33
  			controls <- na.omit(controls)
266 33
  		if (any(is.na(cases)))
267 33
  			cases <- na.omit(cases)
268
  	}
269 33
  	else if (any(is.na(c(controls, cases)))) # Unable to compute anything if there is any NA in the data we want to consider !
270 33
  		return(NA)
271
  	# are there empty cats?
272 33
  	if (length(controls) == 0)
273 33
  		stop("No control observation.")
274 33
  	if (length(cases) == 0)
275 33
  		stop("No case observation.")
276
  	
277
    # check data consistency
278 33
    if (is.ordered(cases)) {
279 33
    	if (is.ordered(controls)) {
280 33
    		if (identical(attr(cases, "levels"), attr(controls, "levels"))) {
281
    			# merge
282 33
    			original.predictor <- ordered(c(as.character(cases), as.character(controls)), levels = attr(controls, "levels"))
283
    			# Predictor, control and cases must be numeric from now on
284 33
    			predictor <- as.numeric(original.predictor)
285 33
    			controls <- as.numeric(controls)
286 33
    			cases <- as.numeric(cases)
287
    		}
288
    		else {
289 0
    			stop("Levels of cases and controls differ.")
290
    		}
291
    	}
292
    	else {
293 0
    		stop("Cases are of ordered type but controls are not.")
294
    	}
295
    }
296 33
    else if (is.numeric(cases)) {
297 33
    	if (is.numeric(controls)) {
298
    		# build response/predictor
299 33
    		predictor <- c(controls, cases)
300 33
    		original.predictor <- predictor
301
    	}
302
    	else {
303 0
    		stop("Cases are of numeric type but controls are not.")
304
    	}
305
    }
306
    else { 
307 0
    	stop("Cases and controls must be numeric or ordered.")
308
    }
309
  	
310
  	# Check infinities
311 33
  	if (any(which <- is.infinite(predictor))) {
312 33
  		warning("Infinite values(s) in predictor, cannot build a valid ROC curve. NaN returned instead.")
313 33
  		return(NaN)
314
  	}
315
    
316 33
    response <- c(rep(0, length(controls)), rep(1, length(cases)))
317 33
    original.response <- response
318 33
    levels <- c(0, 1)
319
  }
320 33
  else if (!missing(density.cases) && !is.null(density.cases) && !missing(density.controls) && !is.null(density.controls)) {
321 33
    if (!is.numeric(density.cases) || !is.numeric(density.controls))
322 0
      stop("'density.cases' and 'density.controls' must be numeric values of density (over the y axis).")
323 33
    if (direction == "auto")
324 33
      dir <- "<"
325
    else
326 0
      dir <- direction
327 33
    smooth.roc <- smooth.roc.density(density.controls=density.controls, density.cases=density.cases, percent=percent, direction=dir)
328 33
    class(smooth.roc) <- "smooth.roc"
329 33
    smooth.roc <- sort(smooth.roc) # sort se and sp
330
    # anchor SE/SP at 0/100
331 33
    smooth.roc$specificities <- c(0, as.vector(smooth.roc$specificities), ifelse(percent, 100, 1))
332 33
    smooth.roc$sensitivities <- c(ifelse(percent, 100, 1), as.vector(smooth.roc$sensitivities), 0)
333 33
    smooth.roc$percent <- percent # keep some basic roc specifications
334 33
    smooth.roc$direction <- direction
335 33
    smooth.roc$call <- match.call()
336 33
    if (auc) {
337 33
      smooth.roc$auc <- auc(smooth.roc, ...)
338 33
      if (direction == "auto" && smooth.roc$auc < roc.utils.min.partial.auc.auc(smooth.roc$auc)) {
339 0
        smooth.roc <- roc.default(density.controls=density.controls, density.cases=density.cases, levels=levels,
340 0
                                  percent=percent, direction=">", auc=auc, ci=ci, plot=plot, ...)
341 0
        smooth.roc$call <- match.call()
342 0
        return(smooth.roc)
343
      }
344
    }
345 33
    if (ci)
346 0
      warning("CI can not be computed with densities.")
347 33
    if (plot)
348 0
      plot.roc(smooth.roc, ...)
349 33
    return(smooth.roc)
350
  }
351
  else {
352 0
    stop("No valid data provided.")
353
  }
354

355 33
  if (direction == "auto" && median(controls) <= median(cases)) {
356 33
  	direction <- "<"
357 33
  	ifelse(quiet, invisible, message)("Setting direction: controls < cases")
358
  }
359 33
  else if (direction == "auto" && median(controls) > median(cases)) {
360 33
  	direction <- ">"
361 33
  	ifelse(quiet, invisible, message)("Setting direction: controls > cases")
362
  }
363
  
364
  # smooth with densities, but only density was provided, not density.controls/cases
365 33
  if (smooth) {
366 33
    if (missing(density.controls))
367 33
      density.controls <- density
368 33
    if (missing(density.cases))
369 33
      density.cases <- density
370
  }
371
  
372
  # Choose algorithm
373 33
  if (isTRUE(algorithm == 6)) {
374 33
    if (is.numeric(predictor)) {
375 33
      algorithm <- 2
376
    }
377
    else {
378 0
      algorithm <- 3
379
    }
380
  }
381 33
  else if (isTRUE(algorithm == 0)) {
382 0
  	load.suggested.package("microbenchmark")
383 0
    cat("Starting benchmark of algorithms 2 and 3, 10 iterations...\n")
384 0
    thresholds <- roc.utils.thresholds(c(controls, cases), direction)
385 0
    benchmark <- microbenchmark::microbenchmark(
386
#      "1" = roc.utils.perfs.all.safe(thresholds=thresholds, controls=controls, cases=cases, direction=direction),
387 0
      "2" = roc.utils.perfs.all.fast(thresholds=thresholds, controls=controls, cases=cases, direction=direction),
388 0
      "3" = rocUtilsPerfsAllC(thresholds=thresholds, controls=controls, cases=cases, direction=direction),
389 0
      times = 10
390
    )
391 0
    print(summary(benchmark))
392 0
    if (any(is.na(benchmark))) {
393 0
      warning("Microbenchmark returned NA. Using default algorithm 1.")
394 0
      algorithm <- 2
395
    }
396 0
    algorithm <- as.integer(names(which.min(tapply(benchmark$time, benchmark$expr, sum))))
397 0
    cat(sprintf("Selecting algorithm %s.\n", algorithm))
398
  }
399 33
  else if (isTRUE(algorithm == 5)) {
400 33
    thresholds <- length(roc.utils.thresholds(c(controls, cases), direction))
401 33
    if (thresholds > 55) { # critical number determined in inst/extra/algorithms.speed.test.R
402 33
      algorithm <- 2
403
    } else {
404 33
      algorithm <- 3
405
    }
406
  }
407
  
408 33
  if (isTRUE(algorithm == 2)) {
409 33
    fun.sesp <- roc.utils.perfs.all.fast
410
  }
411 33
  else if (isTRUE(algorithm  == 3)) {
412 33
    fun.sesp <- rocUtilsPerfsAllC
413
  }
414 33
  else if (isTRUE(algorithm ==  1)) {
415 33
    fun.sesp <- roc.utils.perfs.all.safe
416
  }
417 33
  else if (isTRUE(algorithm == 4)) {
418 33
    fun.sesp <- roc.utils.perfs.all.test
419
  }
420
  else {
421 0
    stop("Unknown algorithm (must be 0, 1, 2, 3, 4 or 5).")
422
  }
423

424 33
  roc <- roc.cc.nochecks(controls, cases,
425 33
             percent=percent,
426 33
             direction=direction,
427 33
             fun.sesp=fun.sesp,
428 33
             smooth = smooth, density.cases = density.cases,  density.controls = density.controls, smooth.method = smooth.method, smooth.n = smooth.n,
429 33
             auc, ...)
430
  
431 33
  roc$call <- match.call()
432 33
  if (smooth) {
433 33
    attr(roc, "roc")$call <- roc$call
434 33
    attr(roc, "roc")$original.predictor <- original.predictor
435 33
    attr(roc, "roc")$original.response <- original.response
436 33
    attr(roc, "roc")$predictor <- predictor
437 33
    attr(roc, "roc")$response <- response
438 33
    attr(roc, "roc")$levels <- levels
439
  }
440 33
  roc$original.predictor <- original.predictor
441 33
  roc$original.response <- original.response
442 33
  roc$predictor <- predictor
443 33
  roc$response <- response
444 33
  roc$levels <- levels
445
  
446 33
  if (auc) {
447 33
  	attr(roc$auc, "roc") <- roc
448
  }
449
  
450
  # compute CI
451 33
  if (ci)
452 33
    roc$ci <- ci(roc, method=ci.method, ...)
453
  # plot
454 33
  if (plot)
455 16
    plot.roc(roc, ...)
456
  
457
  # return roc
458 33
  return(roc)
459
}
460

461
#' Creates a ROC object from response, predictor, ... without argument checking. Not to be exposed to the end user
462
roc.rp.nochecks <- function(response, predictor, levels, ...) {
463 33
  splitted <- split(predictor, response)
464 33
  controls <- splitted[[as.character(levels[1])]]
465 33
  if (length(controls) == 0)
466 0
    stop("No control observation.")
467 33
  cases <- splitted[[as.character(levels[2])]]
468 33
  if (length(cases) == 0)
469 0
    stop("No case observation.")
470 33
  roc.cc.nochecks(controls, cases, ...)
471
}
472

473
#' Creates a ROC object from controls, cases, ... without argument checking. Not to be exposed to the end user
474
roc.cc.nochecks <- function(controls, cases, percent, direction, fun.sesp, smooth, smooth.method, smooth.n, auc, ...) {
475
  # create the roc object
476 33
  roc <- list()
477 33
  class(roc) <- "roc"
478 33
  roc$percent <- percent
479

480
  # compute SE / SP
481 33
  thresholds <- roc.utils.thresholds(c(controls, cases), direction)
482 33
  perfs <- fun.sesp(thresholds=thresholds, controls=controls, cases=cases, direction=direction)
483

484 33
  se <- perfs$se
485 33
  sp <- perfs$sp
486

487 33
  if (percent) {
488 33
    se <- se*100
489 33
    sp <- sp*100
490
  }
491

492
  # store the computations in the roc object
493 33
  roc$sensitivities <- se
494 33
  roc$specificities <- sp
495 33
  roc$thresholds <- thresholds
496 33
  roc <- sort(roc)
497 33
  roc$direction <- direction
498 33
  roc$cases <- cases
499 33
  roc$controls <- controls
500 33
  roc$fun.sesp <- fun.sesp
501
  
502 33
  if (smooth) {
503 33
    roc <- smooth.roc(roc, method=smooth.method, n=smooth.n, ...)
504
  }
505
  # compute AUC
506 33
  if (auc)
507 33
    roc$auc <- auc.roc(roc, ...)
508
  
509 33
  return(roc)
510
}

Read our documentation on viewing source code .

Loading