Hi,
I have a performance regression when including the power of a function as an input argument. I have boiled it down to something like this:
function f1(X)
n,d = size(X)
dist = 0.0
for i = 2:n
for j = 1:i-1
dist_tmp = 0.0
for k = 1:d
@inbounds dist_comp = X[i,k]-X[j,k]
dist_tmp += dist_comp^2
end
dist += 1/dist_tmp
end
end
output = 1/dist
return output
end
function f2(X,ae_power)
n,d = size(X)
dist = 0.0
for i = 2:n
for j = 1:i-1
dist_tmp = 0.0
for k = 1:d
@inbounds dist_comp = X[i,k]-X[j,k]
dist_tmp += dist_comp^ae_power
end
dist += 1/dist_tmp
end
end
output = 1/dist
return output
end
function f3(X,::Val{ae_power}) where ae_power
n,d = size(X)
dist = 0.0
for i = 2:n
for j = 1:i-1
dist_tmp = 0.0
for k = 1:d
@inbounds dist_comp = X[i,k]-X[j,k]
dist_tmp += dist_comp^ae_power
end
dist += 1/dist_tmp
end
end
output = 1/dist
return output
end
X=rand(1:500,500,500);
@btime f1(X)
@btime f2(X,2)
@btime f3(X,Val(2))
102.178 ms (1 allocation: 16 bytes)
181.348 ms (1 allocation: 16 bytes)
175.748 ms (1 allocation: 16 bytes)
Using Val
gives me a small improvement but not close enough to the original implementation.
Strangely @code_warntype
only shows an output for function f1
.
Could someone point me in a direction to improve this? I am using Julia 1.3.1 built from source.