I generally don’t expect to be faster, but identical. Yet, in this particular case, the SVector
version is turning out to be slower (~30%), and I don’t understand why. Do you see the difference? Here is a copy/paste runnable code of both versions. Just run main()
.
I get:
julia> main()
66.126 μs (0 allocations: 0 bytes)
nothing
101.776 μs (0 allocations: 0 bytes)
nothing
import Pkg
Pkg.activate(;temp=true)
Pkg.add("StaticArrays")
using Test
using StaticArrays
# With simple 1D arrays
function XYDOT2!(X,Y,X_DOT,Y_DOT,EPS)
fill!(X_DOT,0.0); fill!(Y_DOT,0.0)
@inbounds for i in eachindex(X,Y,X_DOT,Y_DOT)
x_i, y_i = X[i], Y[i]
acc_x, acc_y = 0.0, 0.0
@fastmath for j=(i+1):lastindex(X)
x_j, y_j = X[j], Y[j]
diff_x = x_i - x_j; diff_y = y_i - y_j
inv_d = inv(diff_x^2 + diff_y^2 + EPS)
dxdt = diff_x * inv_d; dydt = diff_y * inv_d
acc_x += dxdt; acc_y += dydt
X_DOT[j] -= dxdt; Y_DOT[j] -= dydt
end
X_DOT[i] += acc_x; Y_DOT[i] += acc_y
end
return nothing
end
# with vectors of SVectors
function XYDOT_SV!(P::Vector{T}, XYDOT::Vector{T}, EPS) where T<:SVector
fill!(XYDOT, zero(T))
@inbounds for i in eachindex(P,XYDOT)
p_i = P[i]
acc = zero(T)
@fastmath for j=(i+1):lastindex(P)
diff = p_i - P[j]
invd = inv(diff[1]^2 + diff[2]^2 + EPS)
dpdt = diff * invd
acc += dpdt
XYDOT[j] -= dpdt
end
XYDOT[i] += acc
end
return nothing
end
function main()
N = 512
EPS = 0.1
P = rand(SVector{2,Float64},N)
XYDOT = similar(P)
X = [ x[1] for x in P ];
Y = [ x[2] for x in P ];
X_DOT = similar(X)
Y_DOT = similar(Y)
XYDOT2!(X,Y,X_DOT,Y_DOT,EPS)
XYDOT_SV!(P,XYDOT,EPS)
@test all(isapprox.([SVector(x,y) for (x,y) in zip(X_DOT,Y_DOT)], XYDOT))
display(@btime XYDOT2!($X,$Y,$X_DOT,$Y_DOT,$EPS))
display(@btime XYDOT_SV!($P,$XYDOT,$EPS))
return nothing
end