diegozea / ROC.jl
Showing 3 of 6 files from the diff.

@@ -1,6 +1,5 @@
Loading
1 1
## ROC Analysis
2 2
3 -
4 3
function AUC(roc::ROCData)
5 4
	auc=zero(Float64)
6 5
	for i in 2:length(roc.thresholds)
@@ -35,4 +34,4 @@
Loading
35 34
PPV(roc::ROCData) = roc.TP ./ 1:length(roc.thresholds)
36 35
PPV(roc::ROCData,n::Int) = roc.TP[1:length(roc.thresholds) .== n ] / n
37 36
38 -
cutoffs(roc::ROCData) = roc.scores[1:length(roc.thresholds)]
37 +
cutoffs(roc::ROCData) = roc.thresholds

@@ -1,54 +1,8 @@
Loading
1 1
## Data Preparation for ROC Analysis
2 2
3 -
struct _PreparedROCData{T<:Real}
4 -
	scores::Vector{T}
5 -
	labels::Vector{Bool}
3 +
struct ROCData{T <: Real}
6 4
	thresholds::Vector{T}
7 -
	distances::Bool
8 -
end
9 -
10 -
function _create_preparedrocdata(scores::AbstractVector{T},
11 -
								 labels,
12 -
								 distances::Bool) where {T <: Real}
13 -
	thresholds = unique(scores)
14 -
	push!(thresholds, distances ? typemin(T) : typemax(T))
15 -
	sort!(thresholds, rev=!distances)
16 -
	_PreparedROCData(
17 -
		convert(Vector{T}, scores),
18 -
		convert(Vector{Bool}, labels),
19 -
		convert(Vector{T}, thresholds),
20 -
		distances)
21 -
end
22 -
23 -
function _vector2labels(labels::AbstractVector{T}, truelabel::T) where T
24 -
	binary = Vector{Bool}(undef, length(labels))
25 -
	unique_labels = Set{T}()
26 -
	for (i, label) in enumerate(labels)
27 -
		push!(unique_labels, label)
28 -
		if length(unique_labels) > 2
29 -
			error("There is more than two labels.")
30 -
		end
31 -
		binary[i] = label == truelabel
32 -
	end
33 -
	if !(truelabel in unique_labels)
34 -
		error("The truelabel is not in labels.")
35 -
	end
36 -
	binary
37 -
end
38 -
39 -
function _preparedrocdata(scores, labels, distances)
40 -
	if length(scores) == length(labels)
41 -
		_create_preparedrocdata(scores,labels,distances)
42 -
	else
43 -
		error("scores and labels should have the same length")
44 -
	end
45 -
end
46 -
47 -
struct ROCData{T<:Real}
48 -
	scores::Vector{T}
49 -
	labels::Union{Vector{Bool},BitVector}
50 -
	thresholds::Vector{T}
51 -
	P::T
5 +
	P::Int
52 6
	N::Int
53 7
	TP::Vector{Int}
54 8
	TN::Vector{Int}
@@ -58,24 +12,59 @@
Loading
58 12
	TPR::Vector{Float64}
59 13
end
60 14
61 -
function roc(data::_PreparedROCData)
62 -
	P = sum(data.labels)
63 -
	N = length(data.labels) - P
64 -
	n_thresholds = length(data.thresholds)
15 +
function _thresholds(used_scores, distances::Bool)
16 +
	unique_scores = unique(used_scores)
17 +
	if distances
18 +
		push!(unique_scores, -∞)
19 +
		sort!(unique_scores, rev=false)
20 +
	else
21 +
		push!(unique_scores, ∞)
22 +
		sort!(unique_scores, rev=true)
23 +
	end
24 +
end
25 +
26 +
_is_valid_score(score) = isa(score, Number) ? !isnan(score) : false
27 +
28 +
function _prepare_data(scores, labels, distances::Bool, is_positive::Function)
29 +
	n_labels = length(labels)
30 +
	if length(scores) != n_labels
31 +
		throw(ArgumentError("scores and labels should have the same length."))
32 +
	end
33 +
	bit_labels = falses(n_labels)
34 +
	used_scores_type = promote_type(Missings.T(eltype(scores)), Infinite)
35 +
	used_scores = Vector{used_scores_type}(undef, n_labels)
36 +
	n_used = 0
37 +
	for (score, label) in zip(scores, labels)
38 +
		if _is_valid_score(score) && !ismissing(label)
39 +
			n_used += 1
40 +
			@inbounds bit_labels[n_used] = is_positive(label)
41 +
			@inbounds used_scores[n_used] = score
42 +
		end
43 +
	end
44 +
	resize!(bit_labels, n_used)
45 +
	resize!(used_scores, n_used)
46 +
	thresholds = _thresholds(used_scores, distances)
47 +
	(scores=used_scores, labels=bit_labels, thresholds=thresholds)
48 +
end
49 +
50 +
function _roc(scores, labels, thresholds, distances)
51 +
	P = sum(labels)
52 +
	N = length(labels) - P
53 +
	n_thresholds = length(thresholds)
65 54
	TP = Array{Int}(undef, n_thresholds)
66 55
	TN = Array{Int}(undef, n_thresholds)
67 56
	FP = Array{Int}(undef, n_thresholds)
68 57
	FN = Array{Int}(undef, n_thresholds)
69 58
	FPR = Array{Float64}(undef, n_thresholds)
70 59
	TPR = Array{Float64}(undef, n_thresholds)
71 -
	for (i, threshold) in enumerate(data.thresholds)
72 -
		if data.distances
73 -
			mask = data.scores .<= threshold
60 +
	for (i, threshold) in enumerate(thresholds)
61 +
		if distances
62 +
			mask = scores .<= threshold
74 63
		else
75 -
			mask = data.scores .>= threshold
64 +
			mask = scores .>= threshold
76 65
		end
77 -
		predicted_positive = data.labels[mask]
78 -
		predicted_negative = data.labels[.!mask]
66 +
		predicted_positive = labels[mask]
67 +
		predicted_negative = labels[.!mask]
79 68
		TPi = sum(predicted_positive)
80 69
		TNi = sum(.!predicted_negative)
81 70
        TP[i] = TPi
@@ -85,49 +74,47 @@
Loading
85 74
		FPR[i] = FP[i] / (FP[i] + TNi)
86 75
		TPR[i] = TPi / (TPi + FN[i])
87 76
	end
88 -
	ROCData{eltype(data.scores)}(
89 -
		data.scores,
90 -
		data.labels,
91 -
		data.thresholds, P, N, TP, TN, FP, FN, FPR, TPR)
92 -
end
93 -
94 -
# no missing values and AbstractVector{Bool} labels:
95 -
function roc(scores::AbstractVector{T}, labels::AbstractVector{Bool};
96 -
             distances::Bool=false) where T <: Real
97 -
    return roc( _preparedrocdata(scores, labels, distances) )
77 +
	ROCData{eltype(thresholds)}(thresholds, P, N, TP, TN, FP, FN, FPR, TPR)
98 78
end
99 79
100 -
# no missing values (but labels not AbstractVector{Bool}):
101 -
function roc(scores::AbstractVector{T}, labels::AbstractVector{L},
102 -
             truelabel::L; distances::Bool=false) where {T<:Real, L}
103 -
    bit_labels = _vector2labels(labels, truelabel)
104 -
    return roc( _preparedrocdata(scores, bit_labels, distances) )
80 +
function roc(scores, labels, is_positive::Function; distances::Bool=false)
81 +
	data =  _prepare_data(scores, labels, distances, is_positive)
82 +
	_roc(data.scores, data.labels, data.thresholds, distances)
105 83
end
106 84
107 -
# missing labels:
108 -
function roc(scores::AbstractVector{T}, labels::AbstractVector{Union{L, Missing}},
109 -
             truelabel::L; distances::Bool=false) where {T<:Real, L}
110 -
    good_indices = .!(ismissing.(labels))
111 -
    bit_labels = _vector2labels(labels[good_indices], truelabel)
112 -
    return roc( _preparedrocdata(scores[good_indices],
113 -
                                 bit_labels, distances) )
85 +
function roc(scores, labels, positive_label; distances::Bool=false)
86 +
	roc(scores, labels, ==(positive_label); distances=distances)
114 87
end
115 88
116 -
# missing scores:
117 -
function roc(scores::AbstractVector{Union{T,Missing}}, labels::AbstractVector{L},
118 -
             truelabel::L; distances::Bool=false) where {T<:Real, L}
119 -
    good_indices = .!(ismissing.(scores))
120 -
    bit_labels = _vector2labels(labels[good_indices], truelabel)
121 -
    return roc( _preparedrocdata([scores[good_indices]...],
122 -
                                 bit_labels, distances) )
89 +
function _get_positive_label(labels)
90 +
	unique_labels = unique(skipmissing(labels))
91 +
	try
92 +
		sort!(unique_labels)
93 +
	catch err
94 +
		if isa(err, MethodError)
95 +
			@warn "$unique_labels cannot be sorted. Positive: The last element."
96 +
		else
97 +
			rethrow(err)
98 +
		end
99 +
	end
100 +
	n_labels = length(unique_labels)
101 +
	if n_labels == 0
102 +
		throw(ArgumentError("There are not unique labels."))
103 +
	end
104 +
	positive = unique_labels[end]
105 +
	if n_labels == 1
106 +
		@warn "There is only one unique label. Positive: $positive"
107 +
	elseif n_labels > 2
108 +
		@warn "There are more than two unique labels. Positive: $positive"
109 +
	end
110 +
	positive
123 111
end
124 112
125 -
# missing labels and missing scores:
126 -
function roc(scores::AbstractVector{Union{T,Missing}},
127 -
             labels::AbstractVector{Union{L,Missing}},
128 -
             truelabel::L; distances::Bool=false) where {T<:Real, L}
129 -
    good_indices = .!( ismissing.(scores) .| ismissing.(labels) )
130 -
    bit_labels = _vector2labels(labels[good_indices], truelabel)
131 -
    return roc( _preparedrocdata([scores[good_indices]...],
132 -
                                 bit_labels, distances) )
113 +
function roc(scores, labels; distances::Bool=false)
114 +
	if Missings.T(eltype(labels)) === Bool
115 +
		roc(scores, labels, identity; distances=distances)
116 +
	else
117 +
		positive =  _get_positive_label(labels)
118 +
		roc(scores, labels, ==(positive); distances=distances)
119 +
	end
133 120
end
134 121
imilarity index 100%
135 122
ename from test/ROCRdata.csv
136 123
ename to test/data/ROCRdata.csv

@@ -1,8 +1,8 @@
Loading
1 -
__precompile__()
2 1
module ROC
3 2
4 -
using 	Missings,
5 -
		RecipesBase # for creating a Plots.jl recipe
3 +
using Infinity
4 +
using Missings # to use Missings.T
5 +
using RecipesBase # for creating a Plots.jl recipe
6 6
7 7
export	ROCData,
8 8
		roc,
Files Coverage
src 78.10%
Project Totals (4 files) 78.10%
ti7t2gi7lgtt1dsa
q0439qlsemlynhyl
22ub664p375p902c
tg6wgyj9bbt3jp7o

No yaml found.

Create your codecov.yml to customize your Codecov experience

Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading