I have prepared the following code, which might help us identify the problem.
When I run @btime NbodyODE!(dU, U, Gm, 0.)
on my computer, the CPU times are as follows:
- Version 1.10.7: 180.753 ns (0 allocations: 0 bytes)
- Version 1.11.2: 1.290 Ī¼s (0 allocations: 0 bytes)
This means version 1.11.2 is approximately 7.13 times slower, (1.290e-6/1.80753e-7=7.13).
I greatly appreciate your interest and assistance.
Thank you!
using BenchmarkTools
using SIMD
struct VecArray{s_,T,dim}
data::Array{T,dim}
end
Base.eltype(::Type{VecArray{s,T,dim}}) where {s,T,dim} = T
@inline function Base.getindex(v::VecArray{s,T,dim},k...) where {s,T,dim}
Vec{s,T}(NTuple{s,T}(@inbounds v.data[is,k...] for is=1:s))
end
@inline function Base.setindex!(v::VecArray{s,T,dim},vi::Vec{s,T},k...) where {s,T,dim}
@inbounds for is in 1:s
v.data[is,k...] = vi[is]
end
return nothing
end
@inline function Base.setindex!(v::VecArray{s,T,dim},vk::T2,k...) where {s,T,T2,dim}
vk_ = convert(T,vk)
@inbounds for is in 1:s
v.data[is,k...] = vk_
end
return nothing
end
function NbodyODE!(F,u,Gm,t)
N = length(Gm)
for i in 1:N
for k in 1:3
F[k, i, 2] = 0
end
end
for i in 1:N
xi = u[1,i,1]
yi = u[2,i,1]
zi = u[3,i,1]
Gmi = Gm[i]
for j in i+1:N
xij = xi - u[1,j,1]
yij = yi - u[2,j,1]
zij = zi - u[3,j,1]
Gmj = Gm[j]
dotij = (xij*xij+yij*yij+zij*zij)
auxij = 1/(sqrt(dotij)*dotij)
Gmjauxij = Gmj*auxij
F[1,i,2] -= Gmjauxij*xij
F[2,i,2] -= Gmjauxij*yij
F[3,i,2] -= Gmjauxij*zij
Gmiauxij = Gmi*auxij
F[1,j,2] += Gmiauxij*xij
F[2,j,2] += Gmiauxij*yij
F[3,j,2] += Gmiauxij*zij
end
end
for i in 1:3, j in 1:N
F[i,j,1] = u[i,j,2]
end
return nothing
end
s=8
N=5
u0=rand(3,N,2)
Gm=rand(N)
dims=size(u0)
zz=zeros(Float64, s, dims...)
U=VecArray{s,Float64,length(dims)+1}(zz)
dU=deepcopy(U)
indices=eachindex(u0)
for k in indices
U[k]=u0[k]
end
@btime NbodyODE!(dU,U,Gm,0.)