# Fast Parallel ROC AUC Calculation

I’m trying to calculate the receiver operator characteristic area under curve for large amounts of data. I’ve parallelized the sorting, but I’ve got too much SARS-CoV2 in me at the moment to figure out the math to parallelize the integration. I did a bit of searching, but could only find single-threaded implementations. Is anyone aware of a highly parallel Julia-language ROC AUC calculation?

``````function roc_auc(ŷ, y, weights; sort_perm = parallel_sort_perm(ŷ), total_weight = parallel_float64_sum(weights), positive_weight = parallel_float64_sum(y .* weights))
negative_weight  = total_weight - positive_weight
true_pos_weight  = positive_weight
false_pos_weight = negative_weight

# tpr = true_pos/total_pos
# fpr = false_pos/total_neg
# ROC is tpr vs fpr

auc = 0.0

last_fpr = false_pos_weight / negative_weight # = 1.0
for i in sort_perm
if y[i] > 0.5f0
true_pos_weight -= Float64(weights[i])
else
false_pos_weight -= Float64(weights[i])
end
fpr = false_pos_weight / negative_weight
tpr = true_pos_weight  / positive_weight
if fpr != last_fpr
auc += (last_fpr - fpr) * tpr
end
last_fpr = fpr
end

auc
end
``````

Supporting functions:

``````# Sample sort
function parallel_sort_perm(arr)
return sortperm(arr; alg = Base.Sort.MergeSort)
end

rng = MersenneTwister(1234);

samples = sort(map(_ -> arr[rand(rng, 1:length(arr))], 1:sample_count))

x = arr[i]
for k in 1:length(bin_splits)
if bin_splits[k] > x
bin_i = k
break
end
end

end

my_i = 1
if length(bin) > 0
my_out[my_i:(my_i + length(bin)-1)] = bin
my_i += length(bin)
end
end

sort!(my_out; alg = Base.Sort.MergeSort, by = (i -> arr[i]))
end

out = Vector{Int64}(undef, length(arr))

out[start_i:(start_i+length(my_out)-1)] = my_out
end

out
end

# f should be a function that take an indices_range and returns a tuple of reduction values
#
# parallel_iterate will unzip those tuples into a tuple of arrays of reduction values and return that.
function parallel_iterate(f, count)

end

# Mangling so you get a tuple of arrays.
else
end
end

function parallel_float64_sum(arr)
end
end
end
``````

Nevermind, I figured it out.

``````
function roc_auc(ŷ, y, weights; sort_perm = parallel_sort_perm(ŷ))
y       = parallel_apply_sort_perm(y, sort_perm)
weights = parallel_apply_sort_perm(weights, sort_perm)

# tpr = true_pos/total_pos
# fpr = false_pos/total_neg
# ROC is tpr vs fpr

pos_weight = 0.0
neg_weight = 0.0
if y[i] > 0.5f0
pos_weight += Float64(weights[i])
else
neg_weight += Float64(weights[i])
end
end
pos_weight, neg_weight
end

auc = 0.0

last_fpr = false_pos_weight / total_neg_weight
if y[i] > 0.5f0
true_pos_weight -= Float64(weights[i])
else
false_pos_weight -= Float64(weights[i])
end
fpr = false_pos_weight / total_neg_weight
tpr = true_pos_weight  / total_pos_weight
if fpr != last_fpr
auc += (last_fpr - fpr) * tpr
end
last_fpr = fpr
end

auc
end