Hi all, thanks for your answers. partialsort
is a good idea, unfortunately it is a bit slower than my original function. I provide a benchmark with what I understood was the use of partialsort
and my sketch function (which is really slower than the other ones)
function foo(A::Vector{<:Number}, n::Int)
L = length(A)
B = zeros(Float64, L)
d = fill(1e5, n+1)
@inbounds for i=1:L
fill!(d, 1e5)
ix_max = 1
for j=1:L
dij = abs2(A[i] - A[j])
if dij < d[ix_max]
d[ix_max] = dij
ix_max = argmax(d)
end
end
sort!(d)
d1 = sqrt(d[2])
dn = sqrt(d[n+1])
B[i] = d1/dn*sqrt(n/pi)
end
return B
end
function foo2(A::Vector{<:Number}, n::Int)
L = length(A)
B = zeros(Float64, L)
d = fill(1e5, n+1)
@inbounds for i=1:L
fill!(d, 1e5)
for j=1:L
dij = abs2(A[i] - A[j])
if dij < d[end]
d[end] = dij
partialsort!(d, n+1)
end
end
d1 = sqrt(d[2])
dn = sqrt(d[n+1])
B[i] = d1/dn*sqrt(n/pi)
end
return B
end
function foo3(A::Vector{<:Number}, n::Int)
L = length(A)
B = zeros(L)
d = zeros(L)
for i=1:L
@. d = abs2(A[i] - A)
sort!(d)
B[i] = sqrt(d[2]*n/(d[n+1]*pi))
end
return B
end
function main(L)
A = randn(ComplexF64, L)
n = 20
# Check that results are equal.
S1 = foo(A, n)
S2 = foo2(A, n)
S3 = foo3(A, n)
display(norm(S1-S2))
display(norm(S1-S3))
# Benchmark.
@btime foo($A, $n)
@btime foo2($A, $n)
@btime foo3($A, $n)
return
end
main(3000)
Output:
0.0
4.914452931421627e-15
43.340 ms (3 allocations: 23.77 KiB)
55.410 ms (3 allocations: 23.77 KiB)
393.345 ms (4 allocations: 47.03 KiB)
I also used TimerOutputs.jl (really useful one, thanks, Kristoffer) to check that the final sorting is not the problem
function foo_timeit(A::Vector{<:Number}, n::Int)
to = TimerOutput()
L = length(A)
B = zeros(Float64, L)
d = fill(1e5, n+1)
@timeit to "for: i" @inbounds for i=1:L
fill!(d, 1e5)
ix_max = 1
@timeit to "for: j" for j=1:L
dij = abs2(A[i] - A[j])
if dij < d[ix_max]
@timeit to "place dij" d[ix_max] = dij
@timeit to "argmax" ix_max = argmax(d)
end
end
@timeit to "sort" sort!(d)
d1 = sqrt(d[2])
dn = sqrt(d[n+1])
B[i] = d1/dn
end
println(to)
return B
end
foo_timeit(randn(ComplexF64, 3000), 20);
Output:
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
Time Allocations
ββββββββββββββββββββββ βββββββββββββββββββββββ
Tot / % measured: 156ms / 100% 28.8KiB / 11.5%
Section ncalls time %tot avg alloc %tot avg
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
for: i 1 156ms 100% 156ms 3.31KiB 100% 3.31KiB
for: j 3.00k 154ms 98.9% 51.4ΞΌs 1.66KiB 50.0% 0.57B
argmax 383k 49.5ms 31.7% 129ns 0.00B 0.00% 0.00B
place dij 383k 20.3ms 13.0% 53.0ns 0.00B 0.00% 0.00B
sort 3.00k 1.10ms 0.71% 368ns 0.00B 0.00% 0.00B
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
@kristoffer.carlsson, with respect to the data it is very heterogeneous. It comes from general random complex matrices, random symmetric complex matrices and dissipative forms of quantum systems. A general form to get some input data for my function would be:
M = randn(ComplexF64, L, L)
A = eigvals(M)
With respect to inspecting only over for i in 1:L, j in (i+1):L
it might be a good idea, but I would have to find a nice way to store intermediate results that is not too memory consuming. Anyways, I didnβt know about RegionTrees.jl, itβs a very good idea to preprocess input data in this form! Iβll try to implement it and give you feedback if I get anything.
Sorry for the long answer. Thank you all again for those that answered.