You have some unnecessary cruft in your code. For example, there seems to be no point to defining ri
, you can just write
rij[k] = W.r[j, k, iw] - W.r[i, k, iw]
without wasting time on ri
. Also I don’t quite understand your code, so there might be algorithmic speedups possible, beyond the pure implemetation speedups.
Anyway, here are three versions of your code. The first one is basically your own version. The second is a cleaned-up version. The third one assumes that dim
is always equal to 3
, and uses StaticArrays
:
using StaticArrays, BenchmarkTools
function Drift_Force1(W::Array{Float64, 3})
Np,dim,NW = size(W);
F_drift = zeros(Np,dim,NW);
ri = zeros(dim,1);
rj = zeros(dim,1);
rij = zeros(dim,1);
aux = zeros(dim,1);
for iw in 1:NW
for i in 1:Np-1
for k in 1:dim
ri[k] = W[i,k,iw];
end
for j in i+1:Np
for k in 1:dim
rij[k] = W[j,k,iw]-ri[k]
end
norm = sqrt(dot(rij,rij));
aux = rij/norm;
F_drift[i,:,iw] += aux;
F_drift[j,:,iw] -= aux;
end
end
end
return F_drift
end
function Drift_Force2(W::Array{Float64, 3})
dim = size(W, 2)
F_drift = zeros(W)
rij = zeros(dim)
@inbounds for iw in 1:size(W, 3)
for i in 1:size(W, 1)-1
for j in i+1:size(W, 1)
for k in 1:dim
rij[k] = W[j, k, iw] - W[i, k, iw]
end
normalize!(rij)
for k in 1:dim
F_drift[i, k, iw] += rij[k]
F_drift[j, k, iw] -= rij[k]
end
end
end
end
return F_drift
end
function Drift_Force3(W::Array{Float64, 3})
dim = 3
F_drift = zeros(W)
@inbounds for iw in 1:size(W, 3)
for i in 1:size(W, 1)-1
for j in i+1:size(W, 1)
rij = SVector(W[j, 1, iw] - W[i, 1, iw],
W[j, 2, iw] - W[i, 2, iw],
W[j, 3, iw] - W[i, 3, iw])
rij = normalize(rij)
for k in 1:dim
F_drift[i, k, iw] += rij[k]
F_drift[j, k, iw] -= rij[k]
end
end
end
end
return F_drift
end
WW = rand(64,3,10)
maximum(Drift_Force1(WW) .- Drift_Force2(WW))
maximum(Drift_Force1(WW) .- Drift_Force3(WW))
@benchmark Drift_Force1(WW)
@benchmark Drift_Force2(WW)
@benchmark Drift_Force3(WW)
On my computer version 1 runs in 26ms, version 2 in 2.2ms, and version 3 in 190μs, so that’s well over 100x speed-up. If you re-arrange the input matrix so that the size is 3 along the first dimension, there’s an extra ~10% to gain.