1
|
|
# pROC: Tools Receiver operating characteristic (ROC curves) with
|
2
|
|
# (partial) area under the curve, confidence intervals and comparison.
|
3
|
|
# Copyright (C) 2010-2014 Xavier Robin, Alexandre Hainard, Natacha Turck,
|
4
|
|
# Natalia Tiberti, Frédérique Lisacek, Jean-Charles Sanchez
|
5
|
|
# and Markus Müller
|
6
|
|
#
|
7
|
|
# This program is free software: you can redistribute it and/or modify
|
8
|
|
# it under the terms of the GNU General Public License as published by
|
9
|
|
# the Free Software Foundation, either version 3 of the License, or
|
10
|
|
# (at your option) any later version.
|
11
|
|
#
|
12
|
|
# This program is distributed in the hope that it will be useful,
|
13
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
|
# GNU General Public License for more details.
|
16
|
|
#
|
17
|
|
# You should have received a copy of the GNU General Public License
|
18
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
|
|
20
|
|
auc <- function(...) {
|
21
|
3
|
UseMethod("auc")
|
22
|
|
}
|
23
|
|
|
24
|
|
auc.formula <- function(formula, data, ...) {
|
25
|
3
|
data.missing <- missing(data)
|
26
|
3
|
roc.data <- roc.utils.extract.formula(formula, data, ...,
|
27
|
3
|
data.missing = data.missing,
|
28
|
3
|
call = match.call())
|
29
|
3
|
if (length(roc.data$predictor.name) > 1) {
|
30
|
0
|
stop("Only one predictor supported in 'auc'.")
|
31
|
|
}
|
32
|
3
|
response <- roc.data$response
|
33
|
3
|
predictor <- roc.data$predictors[, 1]
|
34
|
|
|
35
|
3
|
roc(response, predictor, auc = TRUE, ...)$auc
|
36
|
|
}
|
37
|
|
|
38
|
|
auc.default <- function(response, predictor, ...) {
|
39
|
3
|
roc.default(response, predictor, auc=TRUE, ...)$auc
|
40
|
|
}
|
41
|
|
|
42
|
|
auc.smooth.roc <- function(smooth.roc, ...) {
|
43
|
3
|
auc.roc(smooth.roc, ...) # force usage of auc.roc: compatible
|
44
|
|
}
|
45
|
|
|
46
|
|
auc.multiclass.roc <- function(multiclass.roc, ...) {
|
47
|
3
|
sum <- sum(sapply(multiclass.roc$rocs, auc, ...))
|
48
|
3
|
count <- length(multiclass.roc$levels)
|
49
|
|
# Hand & Till formula:
|
50
|
3
|
auc <- (2 * sum) / (count * (count - 1))
|
51
|
|
|
52
|
|
# Prepare auc object
|
53
|
3
|
auc <- as.vector(auc) # remove potential pre-existing attributes
|
54
|
3
|
attr(auc, "percent") <- multiclass.roc$percent
|
55
|
3
|
attr(auc, "roc") <- multiclass.roc
|
56
|
|
# Get partial auc details from first computed auc
|
57
|
|
# TODO: find a better way to recover partial.auc!
|
58
|
3
|
aucs <- lapply(multiclass.roc$rocs, auc, ...) # keep individual AUCs in a list for later
|
59
|
3
|
attr(auc, "partial.auc") <- attr(aucs[[1]], "partial.auc")
|
60
|
3
|
if (!identical(attr(aucs[[1]], "partial.auc"), FALSE)) {
|
61
|
3
|
attr(auc, "partial.auc.focus") <- attr(aucs[[1]], "partial.auc.focus")
|
62
|
3
|
attr(auc, "partial.auc.correct") <- attr(aucs[[1]], "partial.auc.correct")
|
63
|
|
}
|
64
|
3
|
class(auc) <- c("multiclass.auc", "numeric")
|
65
|
3
|
return(auc)
|
66
|
|
}
|
67
|
|
|
68
|
|
auc.mv.multiclass.roc <- function(mv.multiclass.roc, ...) {
|
69
|
3
|
aucs <- lapply(mv.multiclass.roc$rocs, function(x) list(auc(x[[1]], ...), auc(x[[2]], ...)))
|
70
|
3
|
A.ij.total <- sum(sapply(aucs, function(x) mean(unlist(x))))
|
71
|
3
|
c <- length(mv.multiclass.roc$levels)
|
72
|
3
|
auc <- 2 / (c * (c-1)) * A.ij.total
|
73
|
|
|
74
|
|
# Prepare auc object
|
75
|
3
|
auc <- as.vector(auc) # remove potential pre-existing attributes
|
76
|
3
|
attr(auc, "percent") <- mv.multiclass.roc$percent
|
77
|
3
|
attr(auc, "roc") <- mv.multiclass.roc
|
78
|
|
|
79
|
|
# Get partial auc details from first computed auc
|
80
|
3
|
attr(auc, "partial.auc") <- attr(aucs[[1]][[1]], "partial.auc")
|
81
|
3
|
if (!identical(attr(aucs[[1]], "partial.auc"), FALSE)) {
|
82
|
3
|
attr(auc, "partial.auc.focus") <- attr(aucs[[1]][[1]], "partial.auc.focus")
|
83
|
3
|
attr(auc, "partial.auc.correct") <- attr(aucs[[1]][[1]], "partial.auc.correct")
|
84
|
|
}
|
85
|
3
|
class(auc) <- c("mv.multiclass.auc", "numeric")
|
86
|
3
|
return(auc)
|
87
|
|
}
|
88
|
|
|
89
|
|
auc.roc <- function(roc,
|
90
|
|
# Partial auc definition
|
91
|
|
partial.auc=FALSE, # false (consider total area) or numeric length 2: boundaries of the AUC to consider, between 0 and 1, or 0 and 100 if percent is TRUE
|
92
|
|
partial.auc.focus=c("specificity", "sensitivity"), # if partial.auc is not FALSE: do the boundaries
|
93
|
|
partial.auc.correct=FALSE,
|
94
|
|
allow.invalid.partial.auc.correct = FALSE,
|
95
|
|
... # unused required to allow roc passing arguments to plot or ci.
|
96
|
|
) {
|
97
|
3
|
if (!identical(partial.auc, FALSE)) {
|
98
|
3
|
partial.auc.focus <- match.arg(partial.auc.focus)
|
99
|
|
}
|
100
|
|
|
101
|
3
|
percent <- roc$percent
|
102
|
|
|
103
|
|
# Validate partial.auc
|
104
|
3
|
if (! identical(partial.auc, FALSE) & !(is.numeric(partial.auc) && length(partial.auc)==2))
|
105
|
0
|
stop("partial.auc must be either FALSE or a numeric vector of length 2")
|
106
|
|
|
107
|
|
# Ensure partial.auc is sorted with partial.auc[1] >= partial.auc[2]
|
108
|
3
|
partial.auc <- sort(partial.auc, decreasing=TRUE)
|
109
|
|
# Get and sort the sensitivities and specificities
|
110
|
3
|
roc <- sort(roc)
|
111
|
3
|
se <- roc$se
|
112
|
3
|
sp <- roc$sp
|
113
|
|
|
114
|
|
# Full area if partial.auc is FALSE
|
115
|
3
|
if (identical(partial.auc, FALSE)) {
|
116
|
3
|
if (methods::is(roc, "smooth.roc") && ! is.null(roc$smoothing.args) && roc$smoothing.args$method == "binormal") {
|
117
|
3
|
coefs <- coefficients(roc$model)
|
118
|
3
|
auc <- unname(pnorm(coefs[1] / sqrt(1+coefs[2]^2)) * ifelse(percent, 100^2, 1))
|
119
|
|
}
|
120
|
|
else {
|
121
|
3
|
diffs.x <- sp[-1] - sp[-length(sp)]
|
122
|
3
|
means.vert <- (se[-1] + se[-length(se)])/2
|
123
|
3
|
auc <- sum(means.vert * diffs.x)
|
124
|
|
}
|
125
|
|
}
|
126
|
|
# Partial area
|
127
|
|
else {
|
128
|
3
|
if (partial.auc.focus == "sensitivity") {
|
129
|
|
# if we focus on SE, just swap and invert x and y and the computations for SP will work
|
130
|
3
|
x <- rev(se)
|
131
|
3
|
y <- rev(sp)
|
132
|
|
}
|
133
|
|
else {
|
134
|
3
|
x <- sp
|
135
|
3
|
y <- se
|
136
|
|
}
|
137
|
|
|
138
|
|
# find the SEs and SPs in the interval
|
139
|
3
|
x.inc <- x[x <= partial.auc[1] & x >= partial.auc[2]]
|
140
|
3
|
y.inc <- y[x <= partial.auc[1] & x >= partial.auc[2]]
|
141
|
|
# compute the AUC strictly in the interval
|
142
|
3
|
diffs.x <- x.inc[-1] - x.inc[-length(x.inc)]
|
143
|
3
|
means.vert <- (y.inc[-1] + y.inc[-length(y.inc)])/2
|
144
|
3
|
auc <- sum(means.vert * diffs.x)
|
145
|
|
# add the borders:
|
146
|
3
|
if (length(x.inc) == 0) { # special case: the whole AUC is between 2 se/sp points. Need to interpolate from both
|
147
|
3
|
diff.horiz <- partial.auc[1] - partial.auc[2]
|
148
|
|
# determine indices
|
149
|
3
|
idx.hi <- match(FALSE, x < partial.auc[1])
|
150
|
3
|
idx.lo <- idx.hi - 1
|
151
|
|
# proportions
|
152
|
3
|
proportion.hi <- (x[idx.hi] - partial.auc[1]) / (x[idx.hi] - x[idx.lo])
|
153
|
3
|
proportion.lo <- (partial.auc[2] - x[idx.lo]) / (x[idx.hi] - x[idx.lo])
|
154
|
|
# interpolated y's
|
155
|
3
|
y.hi <- y[idx.hi] + proportion.hi * (y[idx.lo] - y[idx.hi])
|
156
|
3
|
y.lo <- y[idx.lo] - proportion.lo * (y[idx.lo] - y[idx.hi])
|
157
|
|
# compute AUC
|
158
|
3
|
mean.vert <- (y.hi + y.lo)/2
|
159
|
3
|
auc <- mean.vert*diff.horiz
|
160
|
|
}
|
161
|
|
else { # if the upper limit is not exactly present in SPs, interpolate
|
162
|
3
|
if (!(partial.auc[1] %in% x.inc)) {
|
163
|
|
# find the limit indices
|
164
|
3
|
idx.out <- match(FALSE, x < partial.auc[1])
|
165
|
3
|
idx.in <- idx.out - 1
|
166
|
|
# interpolate y
|
167
|
3
|
proportion <- (partial.auc[1] - x[idx.out]) / (x[idx.in] - x[idx.out])
|
168
|
3
|
y.interpolated <- y[idx.out] + proportion * (y[idx.in] - y[idx.out])
|
169
|
|
# add to AUC
|
170
|
3
|
auc <- auc + (partial.auc[1] - x[idx.in]) * (y[idx.in] + y.interpolated)/2
|
171
|
|
}
|
172
|
3
|
if (!(partial.auc[2] %in% x.inc)) { # if the lower limit is not exactly present in SPs, interpolate
|
173
|
|
# find the limit indices in and out
|
174
|
|
#idx.out <- length(x) - match(TRUE, rev(x) < partial.auc[2]) + 1
|
175
|
3
|
idx.out <- match(TRUE, x > partial.auc[2]) - 1
|
176
|
3
|
idx.in <- idx.out + 1
|
177
|
|
# interpolate y
|
178
|
3
|
proportion <- (x[idx.in] - partial.auc[2]) / (x[idx.in] - x[idx.out])
|
179
|
3
|
y.interpolated <- y[idx.in] + proportion * (y[idx.out] - y[idx.in])
|
180
|
|
# add to AUC
|
181
|
3
|
auc <- auc + (x[idx.in] - partial.auc[2]) * (y[idx.in] + y.interpolated)/2
|
182
|
|
}
|
183
|
|
}
|
184
|
|
}
|
185
|
|
|
186
|
|
# In percent, we have 100*100 = 10,000 as maximum area, so we need to divide by a factor 100
|
187
|
3
|
if (percent)
|
188
|
3
|
auc <- auc/100
|
189
|
|
|
190
|
|
# Correction according to McClish DC, 1989
|
191
|
3
|
if (all(!identical(partial.auc, FALSE), partial.auc.correct)) { # only for pAUC
|
192
|
3
|
min <- roc.utils.min.partial.auc(partial.auc, percent)
|
193
|
3
|
max <- roc.utils.max.partial.auc(partial.auc, percent)
|
194
|
|
# The correction is defined only when auc >= min
|
195
|
3
|
if (!allow.invalid.partial.auc.correct && auc < min) {
|
196
|
3
|
warning("Partial AUC correction not defined for ROC curves below the diagonal.")
|
197
|
3
|
auc <- NA
|
198
|
|
}
|
199
|
3
|
else if (percent) {
|
200
|
3
|
auc <- (100+((auc-min)*100/(max-min)))/2 # McClish formula adapted for %
|
201
|
|
}
|
202
|
|
else {
|
203
|
3
|
auc <- (1+((auc-min)/(max-min)))/2 # original formula by McClish
|
204
|
|
}
|
205
|
|
}
|
206
|
|
# Prepare the AUC to return with attributes
|
207
|
3
|
auc <- as.vector(auc) # remove potential pre-existing attributes
|
208
|
3
|
attr(auc, "partial.auc") <- partial.auc
|
209
|
3
|
attr(auc, "percent") <- percent
|
210
|
3
|
attr(auc, "roc") <- roc
|
211
|
3
|
if (!identical(partial.auc, FALSE)) {
|
212
|
3
|
attr(auc, "partial.auc.focus") <- partial.auc.focus
|
213
|
3
|
attr(auc, "partial.auc.correct") <- partial.auc.correct
|
214
|
|
}
|
215
|
3
|
class(auc) <- c("auc", class(auc))
|
216
|
3
|
return(auc)
|
217
|
|
}
|