1
|
|
# pROC: Tools Receiver operating characteristic (ROC curves) with
|
2
|
|
# (partial) area under the curve, confidence intervals and comparison.
|
3
|
|
# Copyright (C) 2010-2014 Xavier Robin, Alexandre Hainard, Natacha Turck,
|
4
|
|
# Natalia Tiberti, Frédérique Lisacek, Jean-Charles Sanchez
|
5
|
|
# and Markus Müller
|
6
|
|
#
|
7
|
|
# This program is free software: you can redistribute it and/or modify
|
8
|
|
# it under the terms of the GNU General Public License as published by
|
9
|
|
# the Free Software Foundation, either version 3 of the License, or
|
10
|
|
# (at your option) any later version.
|
11
|
|
#
|
12
|
|
# This program is distributed in the hope that it will be useful,
|
13
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
15
|
|
# GNU General Public License for more details.
|
16
|
|
#
|
17
|
|
# You should have received a copy of the GNU General Public License
|
18
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
19
|
|
|
20
|
|
# Helper functions for the ROC curves. These functions should not be called directly as they peform very specific tasks and do nearly no argument validity checks. Not documented in RD and not exported.
|
21
|
|
|
22
|
|
# returns a list of sensitivities (se) and specificities (sp) for the given data. Robust algorithm
|
23
|
|
roc.utils.perfs.all.safe <- function(thresholds, controls, cases, direction) {
|
24
|
3
|
perf.matrix <- sapply(thresholds, roc.utils.perfs, controls=controls, cases=cases, direction=direction)
|
25
|
|
#stopifnot(identical(roc.utils.perfs.all.fast(thresholds, controls, cases, direction), list(se=perf.matrix[2,], sp=perf.matrix[1,])))
|
26
|
3
|
return(list(se=perf.matrix[2,], sp=perf.matrix[1,]))
|
27
|
|
}
|
28
|
|
|
29
|
|
|
30
|
|
roc.utils.perfs.all.test <- function(thresholds, controls, cases, direction) {
|
31
|
3
|
perfs.safe <- roc.utils.perfs.all.safe(thresholds=thresholds, controls=controls, cases=cases, direction=direction)
|
32
|
3
|
perfs.fast <- roc.utils.perfs.all.fast(thresholds=thresholds, controls=controls, cases=cases, direction=direction)
|
33
|
3
|
perfs.C <- rocUtilsPerfsAllC(thresholds=thresholds, controls=controls, cases=cases, direction=direction)
|
34
|
3
|
if (! (identical(perfs.safe, perfs.fast) && identical(perfs.safe, perfs.C))) {
|
35
|
0
|
sessionInfo <- sessionInfo()
|
36
|
0
|
save(thresholds, controls, cases, direction, sessionInfo, file="pROC_bug.RData")
|
37
|
0
|
stop(sprintf("pROC: algorithms returned different values. Diagnostic data saved in pROC_bug.RData. Please report this bug to <%s>.", utils::packageDescription("pROC")$BugReports))
|
38
|
|
}
|
39
|
3
|
return(perfs.safe)
|
40
|
|
}
|
41
|
|
|
42
|
|
|
43
|
|
# returns a list of sensitivities (se) and specificities (sp) for the given data. Fast algorithm
|
44
|
|
roc.utils.perfs.all.fast <- function(thresholds, controls, cases, direction) {
|
45
|
3
|
ncontrols <- length(controls)
|
46
|
3
|
ncases <- length(cases)
|
47
|
3
|
predictor <- c(controls, cases)
|
48
|
3
|
response <- c(rep(0, length(controls)), rep(1, length(cases)))
|
49
|
3
|
decr <- direction=="<"
|
50
|
3
|
predictor.order <- order(predictor, decreasing=decr)
|
51
|
3
|
predictor.sorted <- predictor[predictor.order]
|
52
|
3
|
response.sorted <- response[predictor.order]
|
53
|
|
|
54
|
3
|
tp <- cumsum(response.sorted==1)
|
55
|
3
|
fp <- cumsum(response.sorted==0)
|
56
|
3
|
se <- tp / ncases
|
57
|
3
|
sp <- (ncontrols - fp) / ncontrols
|
58
|
|
# filter duplicate thresholds
|
59
|
3
|
dups.pred <- rev(duplicated(rev(predictor.sorted)))
|
60
|
3
|
dups.sesp <- duplicated(se) & duplicated(sp)
|
61
|
3
|
dups <- dups.pred | dups.sesp
|
62
|
|
# Make sure we have the right length
|
63
|
3
|
if (sum(!dups) != length(thresholds) - 1) {
|
64
|
0
|
sessionInfo <- sessionInfo()
|
65
|
0
|
save(thresholds, controls, cases, direction, sessionInfo, file="pROC_bug.RData")
|
66
|
0
|
stop(sprintf("pROC: fast algorithm computed an incorrect number of sensitivities and specificities. Diagnostic data saved in pROC_bug.RData. Please report this bug to <%s>.", utils::packageDescription("pROC")$BugReports))
|
67
|
|
}
|
68
|
3
|
if (direction == "<") {
|
69
|
3
|
se <- rev(c(0, se[!dups]))
|
70
|
3
|
sp <- rev(c(1, sp[!dups]))
|
71
|
|
}
|
72
|
|
else {
|
73
|
3
|
se <- c(0, se[!dups])
|
74
|
3
|
sp <- c(1, sp[!dups])
|
75
|
|
}
|
76
|
3
|
return(list(se=se, sp=sp))
|
77
|
|
}
|
78
|
|
|
79
|
|
# As roc.utils.perfs.all but returns an "old-style" matrix (pre-fast-algo-compatible)
|
80
|
|
#roc.utils.perfs.all.matrix <- function(...) {
|
81
|
|
# perfs <- roc.utils.perfs.all(...)
|
82
|
|
# return(matrix(c(perfs$sp, perfs$se), nrow=2, byrow=TRUE))
|
83
|
|
#}
|
84
|
|
|
85
|
|
# returns a vector with two elements, sensitivity and specificity, given the threshold at which to evaluate the performance, the values of controls and cases and the direction of the comparison, a character '>' or '<' as controls CMP cases
|
86
|
|
# sp <- roc.utils.perfs(...)[1,]
|
87
|
|
# se <- roc.utils.perfs(...)[2,]
|
88
|
|
roc.utils.perfs <- function(threshold, controls, cases, direction) {
|
89
|
3
|
if (direction == '>') {
|
90
|
3
|
tp <- sum(cases <= threshold)
|
91
|
3
|
tn <- sum(controls > threshold)
|
92
|
|
}
|
93
|
3
|
else if (direction == '<') {
|
94
|
3
|
tp <- sum(cases >= threshold)
|
95
|
3
|
tn <- sum(controls < threshold)
|
96
|
|
}
|
97
|
|
# return(c(sp, se))
|
98
|
3
|
return(c(sp=tn/length(controls), se=tp/length(cases)))
|
99
|
|
}
|
100
|
|
|
101
|
|
# as roc.utils.perfs, but for densities
|
102
|
|
roc.utils.perfs.dens <- function(threshold, x, dens.controls, dens.cases, direction) {
|
103
|
3
|
if (direction == '>') {
|
104
|
3
|
tp <- sum(dens.cases[x <= threshold])
|
105
|
3
|
tn <- sum(dens.controls[x > threshold])
|
106
|
|
}
|
107
|
3
|
else if (direction == '<') {
|
108
|
3
|
tp <- sum(dens.cases[x >= threshold])
|
109
|
3
|
tn <- sum(dens.controls[x < threshold])
|
110
|
|
}
|
111
|
|
# return(c(sp, se))
|
112
|
3
|
return(c(sp=tn/sum(dens.controls), se=tp/sum(dens.cases)))
|
113
|
|
}
|
114
|
|
|
115
|
|
# return the thresholds to evaluate in the ROC curve, given the 'predictor' values. Returns all unique values of 'predictor' plus 2 extreme values
|
116
|
|
roc.utils.thresholds <- function(predictor, direction) {
|
117
|
3
|
unique.candidates <- sort(unique(predictor))
|
118
|
3
|
thresholds1 <- (c(-Inf, unique.candidates) + c(unique.candidates, +Inf))/2
|
119
|
3
|
thresholds2 <- (c(-Inf, unique.candidates)/2 + c(unique.candidates, +Inf)/2)
|
120
|
3
|
thresholds <- ifelse(abs(thresholds1) > 1e100, thresholds2, thresholds1)
|
121
|
3
|
if (any(ties <- thresholds %in% predictor)) {
|
122
|
|
# If we get here, some thresholds are identical to the predictor
|
123
|
|
# This is caused by near numeric ties that caused the mean to equal
|
124
|
|
# one of the candidate
|
125
|
|
# We need to make sure we select the right threshold more carefully
|
126
|
3
|
if (direction == '>') {
|
127
|
|
# We have:
|
128
|
|
# tp <- sum(cases <= threshold)
|
129
|
|
# tn <- sum(controls > threshold)
|
130
|
|
# We need to make sure the selected threshold
|
131
|
|
# Corresponds to the lowest observation of the predictor
|
132
|
|
# Identify problematic thresholds
|
133
|
|
# rows <- which(ties)
|
134
|
3
|
for (tie.idx in which(ties)) {
|
135
|
3
|
if (thresholds[tie.idx] == unique.candidates[tie.idx - 1]) {
|
136
|
|
# We're already good, nothing to do
|
137
|
|
}
|
138
|
3
|
else if (thresholds[tie.idx] == unique.candidates[tie.idx]) {
|
139
|
3
|
thresholds[tie.idx] <- unique.candidates[tie.idx - 1]
|
140
|
|
}
|
141
|
|
else {
|
142
|
0
|
sessionInfo <- sessionInfo()
|
143
|
0
|
save(predictor, direction, sessionInfo, file="pROC_bug.RData")
|
144
|
0
|
stop(sprintf("Couldn't fix near ties in thresholds: %s, %s, %s, %s. Diagnostic data saved in pROC_bug.RData. Please report this bug to <%s>.", thresholds[tie.idx], unique.candidates[tie.idx - 1], unique.candidates[tie.idx], direction, utils::packageDescription("pROC")$BugReports))
|
145
|
|
}
|
146
|
|
}
|
147
|
|
}
|
148
|
3
|
else if (direction == '<') {
|
149
|
|
# We have:
|
150
|
|
# tp <- sum(cases >= threshold)
|
151
|
|
# tn <- sum(controls < threshold)
|
152
|
|
# We need to make sure the selected threshold
|
153
|
|
# Corresponds to the highest observation of the predictor
|
154
|
|
# Identify the problematic thresholds:
|
155
|
|
# rows <- which(apply(o, 1, any))
|
156
|
3
|
for (tie.idx in which(ties)) {
|
157
|
3
|
if (thresholds[tie.idx] == unique.candidates[tie.idx - 1]) {
|
158
|
|
# Easy to fix: should be unique.candidates[tie.idx]
|
159
|
3
|
thresholds[tie.idx] <- unique.candidates[tie.idx]
|
160
|
3
|
} else if (thresholds[tie.idx] == unique.candidates[tie.idx]) {
|
161
|
|
# We're already good, nothing to do
|
162
|
|
}
|
163
|
|
else {
|
164
|
0
|
sessionInfo <- sessionInfo()
|
165
|
0
|
save(predictor, direction, sessionInfo, file="pROC_bug.RData")
|
166
|
0
|
stop(sprintf("Couldn't fix near ties in thresholds: %s, %s, %s, %s. Diagnostic data saved in pROC_bug.RData. Please report this bug to <%s>.", thresholds[tie.idx], unique.candidates[tie.idx - 1], unique.candidates[tie.idx], direction, utils::packageDescription("pROC")$BugReports))
|
167
|
|
}
|
168
|
|
}
|
169
|
|
}
|
170
|
|
}
|
171
|
3
|
return(thresholds)
|
172
|
|
}
|
173
|
|
|
174
|
|
# Find all the local maximas of the ROC curve. Returns a logical vector
|
175
|
|
roc.utils.max.thresholds.idx <- function(thresholds, sp, se) {
|
176
|
3
|
reversed <- FALSE
|
177
|
3
|
if (is.unsorted(sp)) {
|
178
|
|
# make sure SP is sorted increasingly, and sort thresholds accordingly
|
179
|
0
|
thresholds <- rev(thresholds)
|
180
|
0
|
sp <- rev(sp)
|
181
|
0
|
se <- rev(se)
|
182
|
0
|
reversed <- TRUE
|
183
|
|
}
|
184
|
|
# TODO: find whether the duplicate check is still needed.
|
185
|
|
# Should have been fixed by passing only c(controls, cases)
|
186
|
|
# instead of whole 'predictor' to roc.utils.thresholds in roc.default
|
187
|
|
# but are there other potential issues like that?
|
188
|
3
|
dup <- duplicated(data.frame(sp, se))
|
189
|
3
|
thresholds <- thresholds[!dup]
|
190
|
3
|
sp <- sp[!dup]
|
191
|
3
|
se <- se[!dup]
|
192
|
|
# Find the local maximas
|
193
|
3
|
if (length(thresholds) == 1) {
|
194
|
0
|
local.maximas <- TRUE # let's consider that if there is only 1 point, we should print it.
|
195
|
|
}
|
196
|
3
|
else if (length(thresholds) == 2) {
|
197
|
0
|
local.maximas <- c(se[1] > se[2], sp[2] > sp[1])
|
198
|
|
}
|
199
|
|
else {
|
200
|
3
|
local.maximas <- se[1] > se[2]
|
201
|
3
|
for (i in 2:(length(thresholds)-1)) {
|
202
|
3
|
if (sp[i] > sp[i-1] & se[i] > se[i+1])
|
203
|
3
|
local.maximas <- c(local.maximas, TRUE)
|
204
|
3
|
else if (sp[i] > sp[i-1] & se[i] == 0)
|
205
|
0
|
local.maximas <- c(local.maximas, TRUE)
|
206
|
3
|
else if (se[i] > se[i-1] & sp[i] == 1)
|
207
|
0
|
local.maximas <- c(local.maximas, TRUE)
|
208
|
|
else
|
209
|
3
|
local.maximas <- c(local.maximas, FALSE)
|
210
|
|
}
|
211
|
3
|
local.maximas <- c(local.maximas, sp[length(thresholds)] > sp[length(thresholds)-1])
|
212
|
|
}
|
213
|
3
|
if (any(dup)) {
|
214
|
0
|
lms <- rep(FALSE, length(dup))
|
215
|
0
|
lms[!dup] <- local.maximas
|
216
|
0
|
local.maximas <- lms
|
217
|
|
}
|
218
|
3
|
if (reversed)
|
219
|
0
|
rev(local.maximas)
|
220
|
|
|
221
|
|
# Remove +-Inf at the limits of the curve
|
222
|
|
#local.maximas <- local.maximas & is.finite(thresholds)
|
223
|
|
# Question: why did I do that? It breaks coords.roc when partial.auc contains only the extreme point
|
224
|
|
|
225
|
3
|
return(local.maximas)
|
226
|
|
}
|
227
|
|
|
228
|
|
# Define which progress bar to use
|
229
|
|
roc.utils.get.progress.bar <- function(name = getOption("pROCProgress")$name, title = "Bootstrap", label = "", width = getOption("pROCProgress")$width, char = getOption("pROCProgress")$char, style = getOption("pROCProgress")$style, ...) {
|
230
|
3
|
if (name == "tk") { # load tcltk if possible
|
231
|
0
|
if (!requireNamespace("tcltk")) {
|
232
|
|
# If tcltk cannot be loaded fall back to default text progress bar
|
233
|
0
|
name <- "text"
|
234
|
0
|
style <- 3
|
235
|
0
|
char <- "="
|
236
|
0
|
width <- NA
|
237
|
0
|
warning("Package tcltk required with progress='tk' but could not be loaded. Falling back to text progress bar.")
|
238
|
|
}
|
239
|
|
}
|
240
|
3
|
if (name == "none")
|
241
|
3
|
progress_none()
|
242
|
0
|
else if (name == "text") {
|
243
|
|
# Put some default values if user only passed a name
|
244
|
0
|
if (missing(style) && missing(char) && missing(width) && getOption("pROCProgress")$name != "text") {
|
245
|
0
|
style <- 3
|
246
|
0
|
char <- "="
|
247
|
0
|
width <- NA
|
248
|
|
}
|
249
|
0
|
progress_text(char=char, style=style, width=width)
|
250
|
|
}
|
251
|
0
|
else if (name == "tk" || name == "win")
|
252
|
0
|
match.fun(paste("progress", name, sep = "_"))(title=title, label=label, width=width)
|
253
|
|
else # in the special case someone made a progress_other function
|
254
|
0
|
match.fun(paste("progress", name, sep = "_"))(title=title, label=label, width=width, char=char, style=style)
|
255
|
|
}
|
256
|
|
|
257
|
|
# sort roc curve. Make sure specificities are increasing.
|
258
|
|
sort.roc <- function(roc) {
|
259
|
3
|
if (is.unsorted(roc$specificities)) {
|
260
|
3
|
roc$sensitivities <- rev(roc$sensitivities)
|
261
|
3
|
roc$specificities <- rev(roc$specificities)
|
262
|
3
|
roc$thresholds <- rev(roc$thresholds)
|
263
|
|
}
|
264
|
3
|
return(roc)
|
265
|
|
}
|
266
|
|
|
267
|
|
# sort smoothed roc curve. Make sure specificities are increasing.
|
268
|
|
sort.smooth.roc <- function(roc) {
|
269
|
3
|
if (is.unsorted(roc$specificities)) {
|
270
|
3
|
roc$sensitivities <- rev(roc$sensitivities)
|
271
|
3
|
roc$specificities <- rev(roc$specificities)
|
272
|
|
}
|
273
|
3
|
return(roc)
|
274
|
|
}
|
275
|
|
|
276
|
|
# The list of valid coordinate arguments, without 'thresholds'
|
277
|
|
roc.utils.valid.coords <- c("specificity", "sensitivity", "accuracy",
|
278
|
|
"tn", "tp", "fn", "fp",
|
279
|
|
"npv", "ppv", "fdr",
|
280
|
|
"fpr", "tpr", "tnr", "fnr",
|
281
|
|
"1-specificity", "1-sensitivity", "1-accuracy", "1-npv", "1-ppv",
|
282
|
|
"precision", "recall",
|
283
|
|
"youden", "closest.topleft")
|
284
|
|
|
285
|
|
# Arguments which can be returned by coords
|
286
|
|
# @param threshold: FALSE for smooth.roc where threshold isn't valid
|
287
|
|
roc.utils.match.coords.ret.args <- function(x, threshold = TRUE) {
|
288
|
3
|
valid.ret.args <- roc.utils.valid.coords
|
289
|
3
|
if (threshold) {
|
290
|
3
|
valid.ret.args <- c("threshold", valid.ret.args)
|
291
|
|
}
|
292
|
|
|
293
|
3
|
if ("all" %in% x) {
|
294
|
3
|
if (length(x) > 1) {
|
295
|
3
|
stop("ret='all' can't be used with other 'ret' options.")
|
296
|
|
}
|
297
|
3
|
x <- valid.ret.args
|
298
|
|
}
|
299
|
3
|
x <- replace(x, x=="topleft", "closest.topleft")
|
300
|
3
|
x <- replace(x, x=="t", "threshold")
|
301
|
3
|
x <- replace(x, x=="npe", "1-npv")
|
302
|
3
|
x <- replace(x, x=="ppe", "1-ppv")
|
303
|
3
|
return(match.arg(x, valid.ret.args, several.ok=TRUE))
|
304
|
|
}
|
305
|
|
|
306
|
|
# Arguments which can be used as input for coords
|
307
|
|
# @param threshold: FALSE for smooth.roc where threshold isn't valid
|
308
|
|
roc.utils.match.coords.input.args <- function(x, threshold = TRUE) {
|
309
|
3
|
valid.args <- roc.utils.valid.coords
|
310
|
3
|
if (threshold) {
|
311
|
3
|
valid.args <- c("threshold", valid.args)
|
312
|
|
}
|
313
|
3
|
x <- replace(x, x=="topleft", "closest.topleft")
|
314
|
3
|
x <- replace(x, x=="t", "threshold")
|
315
|
3
|
x <- replace(x, x=="npe", "1-npv")
|
316
|
3
|
x <- replace(x, x=="ppe", "1-ppv")
|
317
|
3
|
matched <- match.arg(x, valid.args, several.ok=FALSE)
|
318
|
|
# We only handle monotone coords
|
319
|
3
|
if (! coord.is.monotone[matched]) {
|
320
|
3
|
stop(sprintf("Coordinate '%s' is not monotone and cannot be used as input.", matched))
|
321
|
|
}
|
322
|
3
|
return(matched)
|
323
|
|
}
|
324
|
|
|
325
|
|
# Compute the min/max for partial AUC
|
326
|
|
# ... with an auc
|
327
|
|
roc.utils.min.partial.auc.auc <- function(auc) {
|
328
|
3
|
roc.utils.min.partial.auc(attr(auc, "partial.auc"), attr(auc, "percent"))
|
329
|
|
}
|
330
|
|
|
331
|
|
roc.utils.max.partial.auc.auc <- function(roc) {
|
332
|
0
|
roc.utils.max.partial.auc(attr(auc, "partial.auc"), attr(auc, "percent"))
|
333
|
|
}
|
334
|
|
|
335
|
|
# ... with partial.auc/percent
|
336
|
|
roc.utils.min.partial.auc <- function(partial.auc, percent) {
|
337
|
3
|
if (!identical(partial.auc, FALSE)) {
|
338
|
3
|
min <- sum(ifelse(percent, 100, 1)-partial.auc)*abs(diff(partial.auc))/2/ifelse(percent, 100, 1)
|
339
|
|
}
|
340
|
|
else {
|
341
|
3
|
min <- 0.5 * ifelse(percent, 100, 1)
|
342
|
|
}
|
343
|
3
|
return(min)
|
344
|
|
}
|
345
|
|
|
346
|
|
roc.utils.max.partial.auc <- function(partial.auc, percent) {
|
347
|
3
|
if (!identical(partial.auc, FALSE)) {
|
348
|
3
|
max <- abs(diff(partial.auc))
|
349
|
|
}
|
350
|
|
else {
|
351
|
0
|
max <- 1 * ifelse(percent, 100, 1)
|
352
|
|
}
|
353
|
3
|
return(max)
|
354
|
|
}
|
355
|
|
|
356
|
|
# Checks if the
|
357
|
|
# Input: roc object
|
358
|
|
# Output: boolean, true the curve reaches 100%/100%, false otherwise
|
359
|
|
roc.utils.is.perfect.curve <- function(roc) {
|
360
|
3
|
best.point <- max(roc$sensitivities + roc$specificities) / ifelse(roc$percent, 100, 1)
|
361
|
3
|
return(abs(best.point - 2) < .Machine$double.eps ^ 0.5) # or best.point == 2, with numerical tolerance
|
362
|
|
}
|
363
|
|
|
364
|
|
# Load package namespace 'pkg'.
|
365
|
|
# Input: package name
|
366
|
|
# Returns: TRUE upon success (or if the package was already loaded)
|
367
|
|
# Stops if the package can't be loaded
|
368
|
|
load.suggested.package <- function(pkg) {
|
369
|
3
|
if (requireNamespace(pkg)) {
|
370
|
3
|
return(TRUE)
|
371
|
|
}
|
372
|
3
|
else if (interactive()) {
|
373
|
0
|
if (getRversion() < "3.5.0") { # utils::askYesNo not available
|
374
|
0
|
message(sprintf("Package %s not available, do you want to install it now?", pkg))
|
375
|
0
|
auto.install <- utils::menu(c("Yes", "No")) == 1
|
376
|
|
}
|
377
|
|
else {
|
378
|
0
|
auto.install <- utils::askYesNo(sprintf("Package %s not available, do you want to install it now?", pkg))
|
379
|
|
}
|
380
|
0
|
if (isTRUE(auto.install)) {
|
381
|
0
|
utils::install.packages(pkg)
|
382
|
0
|
if (requireNamespace(pkg)) {
|
383
|
0
|
return(TRUE)
|
384
|
|
}
|
385
|
|
else {
|
386
|
0
|
stop(sprintf("Installation of %s failed!", pkg))
|
387
|
|
}
|
388
|
|
}
|
389
|
|
}
|
390
|
0
|
stop(sprintf("Package '%s' not available.", pkg))
|
391
|
|
}
|
392
|
|
|
393
|
|
|
394
|
|
# Calculate coordinates
|
395
|
|
# @param roc: the roc curve, used to guess if data is in percent and number of cases and controls.
|
396
|
|
# @param thr, se, sp
|
397
|
|
# @param best.weights: see coords
|
398
|
|
# @return data.frame
|
399
|
|
roc.utils.calc.coords <- function(roc, thr, se, sp, best.weights) {
|
400
|
3
|
ncases <- ifelse(methods::is(roc, "smooth.roc"), length(attr(roc, "roc")$cases), length(roc$cases))
|
401
|
3
|
ncontrols <- ifelse(methods::is(roc, "smooth.roc"), length(attr(roc, "roc")$controls), length(roc$controls))
|
402
|
3
|
substr.percent <- ifelse(roc$percent, 100, 1)
|
403
|
|
|
404
|
3
|
tp <- se * ncases / substr.percent
|
405
|
3
|
fn <- ncases - tp
|
406
|
3
|
tn <- sp * ncontrols / substr.percent
|
407
|
3
|
fp <- ncontrols - tn
|
408
|
3
|
npv <- substr.percent * tn / (tn + fn)
|
409
|
3
|
ppv <- substr.percent * tp / (tp + fp)
|
410
|
|
#res <- matrix(NA, nrow = length(ret), ncol = length(se))
|
411
|
|
#if ("tp" %in% ret) {}
|
412
|
3
|
accuracy <- substr.percent * (tp + tn) / (ncases + ncontrols)
|
413
|
3
|
precision <- ppv
|
414
|
3
|
recall <- tpr <- se
|
415
|
3
|
fpr <- substr.percent - sp
|
416
|
3
|
tnr <- sp
|
417
|
3
|
fnr <- substr.percent * fn / (tp + fn)
|
418
|
3
|
fdr <- substr.percent * fp / (tp + fp)
|
419
|
3
|
youden <- roc.utils.optim.crit(se, sp, substr.percent, best.weights, "youden")
|
420
|
3
|
closest.topleft <- - roc.utils.optim.crit(se, sp, substr.percent, best.weights, "closest.topleft") / substr.percent
|
421
|
|
|
422
|
3
|
return(cbind(
|
423
|
3
|
threshold=thr,
|
424
|
3
|
sensitivity=se,
|
425
|
3
|
specificity=sp,
|
426
|
3
|
accuracy=accuracy,
|
427
|
3
|
tn=tn,
|
428
|
3
|
tp=tp,
|
429
|
3
|
fn=fn,
|
430
|
3
|
fp=fp,
|
431
|
3
|
npv=npv,
|
432
|
3
|
ppv=ppv,
|
433
|
3
|
tpr=tpr,
|
434
|
3
|
tnr=tnr,
|
435
|
3
|
fpr=fpr,
|
436
|
3
|
fnr=fnr,
|
437
|
3
|
fdr=fdr,
|
438
|
3
|
"1-specificity"=substr.percent-sp,
|
439
|
3
|
"1-sensitivity"=substr.percent-se,
|
440
|
3
|
"1-accuracy"=substr.percent-accuracy,
|
441
|
3
|
"1-npv"=substr.percent-npv,
|
442
|
3
|
"1-ppv"=substr.percent-ppv,
|
443
|
3
|
precision=precision,
|
444
|
3
|
recall=recall,
|
445
|
3
|
youden=youden,
|
446
|
3
|
closest.topleft=closest.topleft
|
447
|
|
))
|
448
|
|
}
|
449
|
|
|
450
|
|
# Match arbitrary user-supplied thresholds to the threshold of the ROC curve.
|
451
|
|
# We need to be careful to assign x to the right thresholds around exact data point
|
452
|
|
# values. This means this function cannot look at the ROC thresholds themselves
|
453
|
|
# but must instead use the predictor values to assess the thresholds exactly.
|
454
|
|
# Returns the indices of the thresholds x.
|
455
|
|
# @param roc: the roc curve
|
456
|
|
# @param x: the threshold to determine indices
|
457
|
|
# @return integer vector of indices along roc$thresholds/roc$se/roc$sp.
|
458
|
|
roc.utils.thr.idx <- function(roc, x) {
|
459
|
3
|
cut_points <- sort(unique(roc$predictor))
|
460
|
3
|
thr_idx <- rep(NA_integer_, length(x))
|
461
|
3
|
if (roc$direction == "<") {
|
462
|
3
|
cut_points <- c(cut_points, Inf)
|
463
|
3
|
j <- 1
|
464
|
3
|
o <- order(x)
|
465
|
3
|
for (i in seq_along(x)) {
|
466
|
3
|
t <- x[o[i]]
|
467
|
3
|
while (cut_points[j] < t) {
|
468
|
3
|
j <- j + 1
|
469
|
|
}
|
470
|
3
|
thr_idx[o[i]] <- j
|
471
|
|
}
|
472
|
|
}
|
473
|
|
else {
|
474
|
3
|
cut_points <- c(rev(cut_points), -Inf)
|
475
|
3
|
j <- 1
|
476
|
3
|
o <- order(x, decreasing = TRUE)
|
477
|
3
|
for (i in seq_along(x)) {
|
478
|
3
|
t <- x[o[i]]
|
479
|
3
|
while (cut_points[j] > t) {
|
480
|
3
|
j <- j + 1
|
481
|
|
}
|
482
|
3
|
thr_idx[o[i]] <- j
|
483
|
|
}
|
484
|
|
}
|
485
|
3
|
return(thr_idx)
|
486
|
|
}
|
487
|
|
|
488
|
|
|
489
|
|
# Get optimal criteria Youden or Closest Topleft
|
490
|
|
# @param se, sp: the roc curve
|
491
|
|
# @param max: the maximum value, 1 or 100, based on percent. Namely ifelse(percent, 100, 1)
|
492
|
|
# @param weights: see coords(best.weights=)
|
493
|
|
# @param method: youden or closest.topleft coords(best.method=)
|
494
|
|
# @return numeric vector along roc$thresholds/roc$se/roc$sp.
|
495
|
|
roc.utils.optim.crit <- function(se, sp, max, weights, method) {
|
496
|
3
|
if (is.numeric(weights) && length(weights) == 2) {
|
497
|
3
|
r <- (1 - weights[2]) / (weights[1] * weights[2]) # r should be 1 by default
|
498
|
|
}
|
499
|
|
else {
|
500
|
3
|
stop("'best.weights' must be a numeric vector of length 2")
|
501
|
|
}
|
502
|
3
|
if (weights[2] <= 0 || weights[2] >= 1) {
|
503
|
3
|
stop("prevalence ('best.weights[2]') must be in the interval ]0,1[.")
|
504
|
|
}
|
505
|
|
|
506
|
3
|
if (method == "youden") {
|
507
|
3
|
optim.crit <- se + r * sp
|
508
|
|
}
|
509
|
3
|
else if (method == "closest.topleft" || method == "topleft") {
|
510
|
3
|
optim.crit <- - ((max - se)^2 + r * (max - sp)^2)
|
511
|
|
}
|
512
|
3
|
return(optim.crit)
|
513
|
|
}
|
514
|
|
|
515
|
|
coord.is.monotone <- c(
|
516
|
|
"threshold"=TRUE,
|
517
|
|
"sensitivity"=TRUE,
|
518
|
|
"specificity"=TRUE,
|
519
|
|
"accuracy"=FALSE,
|
520
|
|
"tn"=TRUE,
|
521
|
|
"tp"=TRUE,
|
522
|
|
"fn"=TRUE,
|
523
|
|
"fp"=TRUE,
|
524
|
|
"npv"=FALSE,
|
525
|
|
"ppv"=FALSE,
|
526
|
|
"tpr"=TRUE,
|
527
|
|
"tnr"=TRUE,
|
528
|
|
"fpr"=TRUE,
|
529
|
|
"fnr"=TRUE,
|
530
|
|
"fdr"=FALSE,
|
531
|
|
"1-specificity"=TRUE,
|
532
|
|
"1-sensitivity"=TRUE,
|
533
|
|
"1-accuracy"=FALSE,
|
534
|
|
"1-npv"=FALSE,
|
535
|
|
"1-ppv"=FALSE,
|
536
|
|
"precision"=FALSE,
|
537
|
|
"recall"=TRUE,
|
538
|
|
"youden"=FALSE,
|
539
|
|
"closest.topleft"=FALSE
|
540
|
|
)
|
541
|
|
|
542
|
|
coord.is.decreasing <- c(
|
543
|
|
"threshold"=NA, # Depends on direction
|
544
|
|
"sensitivity"=TRUE,
|
545
|
|
"specificity"=FALSE,
|
546
|
|
"accuracy"=NA,
|
547
|
|
"tn"=FALSE,
|
548
|
|
"tp"=TRUE,
|
549
|
|
"fn"=FALSE,
|
550
|
|
"fp"=TRUE,
|
551
|
|
"npv"=NA,
|
552
|
|
"ppv"=NA,
|
553
|
|
"tpr"=TRUE,
|
554
|
|
"tnr"=FALSE,
|
555
|
|
"fpr"=TRUE,
|
556
|
|
"fnr"=FALSE,
|
557
|
|
"fdr"=NA,
|
558
|
|
"1-specificity"=TRUE,
|
559
|
|
"1-sensitivity"=FALSE,
|
560
|
|
"1-accuracy"=NA,
|
561
|
|
"1-npv"=NA,
|
562
|
|
"1-ppv"=NA,
|
563
|
|
"precision"=NA,
|
564
|
|
"recall"=TRUE,
|
565
|
|
"youden"=NA,
|
566
|
|
"closest.topleft"=NA
|
567
|
|
)
|
568
|
|
|
569
|
|
# Get response and predictor(s) from a formula.
|
570
|
|
# This function takes care of all the logic to handle
|
571
|
|
# weights, subset, na.action etc. It handles formulas with
|
572
|
|
# and without data. It rejects weights and certain na.actions.
|
573
|
|
# @param formula
|
574
|
|
# @param data
|
575
|
|
# @param data.missing
|
576
|
|
# @param call the call from the parent function
|
577
|
|
# @param ... the ... from the parent function
|
578
|
|
# @return a list with 3 elements: response (vector), predictor.names (character),
|
579
|
|
# predictors (data.frame).
|
580
|
|
roc.utils.extract.formula <- function(formula, data, data.missing, call, ...) {
|
581
|
|
# Get predictors (easy)
|
582
|
3
|
if (data.missing) {
|
583
|
3
|
predictors <- attr(terms(formula), "term.labels")
|
584
|
|
}
|
585
|
|
else {
|
586
|
3
|
predictors <- attr(terms(formula, data = data), "term.labels")
|
587
|
|
}
|
588
|
|
|
589
|
3
|
indx <- match(c("formula", "data", "weights", "subset", "na.action"), names(call), nomatch=0)
|
590
|
3
|
if (indx[1] == 0) {
|
591
|
0
|
stop("A formula argument is required")
|
592
|
|
}
|
593
|
|
# Keep the standard arguments and run them in model.frame
|
594
|
3
|
temp <- call[c(1,indx)]
|
595
|
3
|
temp[[1]] <- as.name("model.frame")
|
596
|
|
# Only na.pass and na.fail should be used
|
597
|
3
|
if (indx[5] != 0) {
|
598
|
3
|
na.action.value = as.character(call[indx[5]])
|
599
|
3
|
if (! na.action.value %in% c("na.pass", "na.fail")) {
|
600
|
3
|
warning(paste0(sprintf("Value %s of na.action is not supported ", na.action.value),
|
601
|
3
|
"and will break pairing in roc.test and are.paired. ",
|
602
|
3
|
"Please use 'na.rm = TRUE' instead."))
|
603
|
|
}
|
604
|
|
}
|
605
|
|
else {
|
606
|
3
|
temp$na.action = "na.pass"
|
607
|
|
}
|
608
|
|
# Adjust call with data from caller
|
609
|
3
|
if (data.missing) {
|
610
|
3
|
temp$data <- NULL
|
611
|
|
}
|
612
|
|
|
613
|
|
# Run model.frame in the parent
|
614
|
3
|
m <- eval.parent(temp, n = 2)
|
615
|
|
|
616
|
3
|
if (!is.null(model.weights(m))) stop("weights are not supported")
|
617
|
|
|
618
|
3
|
return(list(response.name = names(m)[1],
|
619
|
3
|
response = model.response(m),
|
620
|
3
|
predictor.names = predictors,
|
621
|
3
|
predictors = m[predictors]))
|
622
|
|
}
|