1
|
|
# pROC: Tools Receiver operating characteristic (ROC curves) with
|
2
|
|
# (partial) area under the curve, confidence intervals and comparison.
|
3
|
|
# Copyright (C) 2010-2014 Xavier Robin, Alexandre Hainard, Natacha Turck,
|
4
|
|
# Natalia Tiberti, Frédérique Lisacek, Jean-Charles Sanchez,
|
5
|
|
# Markus Müller
|
6
|
|
#
|
7
|
|
# This program is free software: you can redistribute it and/or modify
|
8
|
|
# it under the terms of the GNU General Public License as published by
|
9
|
|
# the Free Software Foundation, either version 3 of the License, or
|
10
|
|
# (at your option) any later version.
|
11
|
|
#
|
12
|
|
# This program is distributed in the hope that it will be useful,
|
13
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
|
# GNU General Public License for more details.
|
16
|
|
#
|
17
|
|
# You should have received a copy of the GNU General Public License
|
18
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
|
|
20
|
|
# Delong's test paired, used by roc.test.roc
|
21
|
|
delong.paired.test <- function(roc1, roc2) {
|
22
|
3
|
n <- length(roc1$controls)
|
23
|
3
|
m <- length(roc1$cases)
|
24
|
|
|
25
|
3
|
VR <- delongPlacements(roc1)
|
26
|
3
|
VS <- delongPlacements(roc2)
|
27
|
|
|
28
|
3
|
SX <- matrix(NA, ncol=2, nrow=2)
|
29
|
3
|
SX[1,1] <- sum((VR$X - VR$theta) * (VR$X - VR$theta))/(m-1)
|
30
|
3
|
SX[1,2] <- sum((VR$X - VR$theta) * (VS$X - VS$theta))/(m-1)
|
31
|
3
|
SX[2,1] <- sum((VS$X - VS$theta) * (VR$X - VR$theta))/(m-1)
|
32
|
3
|
SX[2,2] <- sum((VS$X - VS$theta) * (VS$X - VS$theta))/(m-1)
|
33
|
|
|
34
|
3
|
SY <- matrix(NA, ncol=2, nrow=2)
|
35
|
3
|
SY[1,1] <- sum((VR$Y - VR$theta) * (VR$Y - VR$theta))/(n-1)
|
36
|
3
|
SY[1,2] <- sum((VR$Y - VR$theta) * (VS$Y - VS$theta))/(n-1)
|
37
|
3
|
SY[2,1] <- sum((VS$Y - VS$theta) * (VR$Y - VR$theta))/(n-1)
|
38
|
3
|
SY[2,2] <- sum((VS$Y - VS$theta) * (VS$Y - VS$theta))/(n-1)
|
39
|
|
|
40
|
3
|
S <- SX/m + SY/n
|
41
|
3
|
L <- c(1,-1)
|
42
|
3
|
sig <- sqrt(L%*%S%*%L)
|
43
|
3
|
zscore <- (VR$theta-VS$theta)/sig[1]
|
44
|
3
|
if (is.nan(zscore) && VR$theta == VR$theta && sig[1] == 0)
|
45
|
0
|
zscore <- 0 # special case: no difference between theta's produces a NaN
|
46
|
3
|
return(zscore)
|
47
|
|
}
|
48
|
|
|
49
|
|
# Delong's test unpaired, used by roc.test.roc
|
50
|
|
delong.unpaired.test <- function(roc1, roc2) {
|
51
|
|
|
52
|
3
|
nR <- length(roc1$controls)
|
53
|
3
|
mR <- length(roc1$cases)
|
54
|
|
|
55
|
3
|
nS <- length(roc2$controls)
|
56
|
3
|
mS <- length(roc2$cases)
|
57
|
|
|
58
|
3
|
VR <- delongPlacements(roc1)
|
59
|
3
|
VS <- delongPlacements(roc2)
|
60
|
|
|
61
|
3
|
SRX <- sum((VR$X - VR$theta) * (VR$X - VR$theta))/(mR-1)
|
62
|
3
|
SSX <- sum((VS$X - VS$theta) * (VS$X - VS$theta))/(mS-1)
|
63
|
|
|
64
|
3
|
SRY <- sum((VR$Y - VR$theta) * (VR$Y - VR$theta))/(nR-1)
|
65
|
3
|
SSY <- sum((VS$Y - VS$theta) * (VS$Y - VS$theta))/(nS-1)
|
66
|
|
|
67
|
3
|
SR <- SRX/mR + SRY/nR
|
68
|
3
|
SS <- SSX/mS + SSY/nS
|
69
|
|
|
70
|
3
|
ntotR <- nR + mR
|
71
|
3
|
ntotS <- nS + mS
|
72
|
3
|
SSR <- sqrt((SR) + (SS))
|
73
|
3
|
t <- (VR$theta - VS$theta) / SSR
|
74
|
3
|
df <- ((SR) + (SS))^2 /
|
75
|
3
|
(((SR)^2 / (ntotR-1)) + ((SS)^2 / (ntotS -1 )))
|
76
|
|
|
77
|
3
|
return(c(t, df))
|
78
|
|
}
|
79
|
|
|
80
|
|
ci.auc.delong <- function(roc, conf.level) {
|
81
|
3
|
YR <- roc$controls # = C2, n, YRj
|
82
|
3
|
XR <- roc$cases # = C1, m, XRi
|
83
|
|
|
84
|
3
|
n <- length(YR)
|
85
|
3
|
m <- length(XR)
|
86
|
|
|
87
|
|
# If controls or cases have a single observation, we would produce NaNs in SX and SY
|
88
|
3
|
if (m <= 1 || n <= 1) {
|
89
|
0
|
return(rep(NA, 3))
|
90
|
|
}
|
91
|
|
|
92
|
3
|
V <- delongPlacements(roc)
|
93
|
|
|
94
|
3
|
SX <- sum((V$X - V$theta) * (V$X - V$theta))/(m-1)
|
95
|
3
|
SY <- sum((V$Y - V$theta) * (V$Y - V$theta))/(n-1)
|
96
|
3
|
S <- SX/m + SY/n
|
97
|
3
|
ci <- qnorm(c(0+(1-conf.level)/2, .5, 1-(1-conf.level)/2), mean = V$theta, sd = sqrt(S))
|
98
|
|
# In some rare cases we have ci[3] > 1 or ci[1] < 0
|
99
|
3
|
ci[ci > 1] <- 1
|
100
|
3
|
ci[ci < 0] <- 0
|
101
|
|
|
102
|
|
# According to Pepe (p. 107), we should probably be doing something like
|
103
|
|
# log(roc$auc / (1 - roc$auc)) + pnorm( 1-conf.level/2) * (S / (roc$auc * (1 - roc$auc)))
|
104
|
|
# log(roc$auc / (1 - roc$auc)) - pnorm( 1-conf.level/2) * (S / (roc$auc * (1 - roc$auc)))
|
105
|
|
# for logit AUC, so that for AUC:
|
106
|
|
# exp(log(roc$auc / (1 - roc$auc)) + pnorm( 1-conf.level/2) * (S / (roc$auc * (1 - roc$auc)))) * (1 - roc$auc)
|
107
|
|
# exp(log(roc$auc / (1 - roc$auc)) - pnorm( 1-conf.level/2) * (S / (roc$auc * (1 - roc$auc)))) * (1 - roc$auc)
|
108
|
|
# However the bounds are very very much smaller (about 10 times) than bootstrap, which seems unrealistic
|
109
|
|
# Stay with normal conf interval for now.
|
110
|
|
|
111
|
3
|
return(ci)
|
112
|
|
}
|
113
|
|
|
114
|
|
# Calls delongPlacementsCpp safely
|
115
|
|
# Ensures that the theta value calculated is correct
|
116
|
|
delongPlacements <- function(roc) {
|
117
|
3
|
placements <- delongPlacementsCpp(roc)
|
118
|
|
|
119
|
|
# Ensure theta equals auc
|
120
|
3
|
auc <- roc$auc / ifelse(roc$percent, 100, 1)
|
121
|
3
|
if (! isTRUE(all.equal(placements$theta, auc))) {
|
122
|
0
|
sessionInfo <- sessionInfo()
|
123
|
0
|
save(roc, placements, sessionInfo, file="pROC_bug.RData")
|
124
|
0
|
stop(sprintf("pROC: error in calculating DeLong's theta: got %.20f instead of %.20f. Diagnostic data saved in pROC_bug.RData. Please report this bug to <%s>.", placements$theta, auc, utils:: packageDescription("pROC")$BugReports))
|
125
|
|
}
|
126
|
|
|
127
|
3
|
return(placements)
|
128
|
|
}
|