When I solve large scale ODEs (136 equations), I try to extract some common factor and calculate them before using them in the equations, however that will take me a 3D array, which will take me quite a lot of allocations. And the program do become 10 times faster with this way although there are more allocations. For my real program initially, there are 53 allocations with a run time of 10s, with this method, there are 70K allocations with a run time of 1s. And I am sure this allocation is from the 3D array, which can not be set to be static array, I am just wondering how to make this 3D array not cause allocation.
Here I just take 4 equations from my real program, but still can be run…
I have to add one point, I already know the size of the 3D array, which is (Len, 12, 4)
function Test3DMolasses(dy, y, p, t)
para01, delta, s, Polar, a, b, c, Len, ft, ftc = p
vx, vy, vz, rx, ry, rz, theta1, theta2 = para01
# s1 s2 are sqrt(Ik/Isat)
# sigma+ means the laser component polarization from the positive direction is +
aplus = -1.0im * cis(theta1) * cos(k * rx + k * vx * t / Gamma) + cis(theta2) * sin(k * ry + k * vy * t / Gamma) + cos(k * rz + k * vz * t / Gamma) - 1.0im * sin(k * rz + k * vz * t / Gamma)
bplus = sqrt(2.0) * cis(theta1) * sin(k * rx + k * vx * t / Gamma) + sqrt(2.0) * cis(theta2) * cos(k * ry + k * vy * t / Gamma)
cplus = -1.0im * cis(theta1) * cos(k * rx + k * vx * t / Gamma) - cis(theta2) * sin(k * ry + k * vy * t / Gamma) - cos(k * rz + k * vz * t / Gamma) - 1.0im * sin(k * rz + k * vz * t / Gamma) #Circularly��?1 0 -1 in epision��?-1 0 +1 in f_q
# sigma- means the laser component polarization from the positive direction is +
aminus = -1.0im * cis(theta1) * cos(k * rx + k * vx * t / Gamma) - cis(theta2) * sin(k * ry + k * vy * t / Gamma) + cos(k * rz + k * vz * t / Gamma) + 1.0im * sin(k * rz + k * vz * t / Gamma)
bminus = -sqrt(2.0) * cis(theta1) * sin(k * rx + k * vx * t / Gamma) + sqrt(2.0) * cis(theta2) * cos(k * ry + k * vy * t / Gamma)
cminus = -1.0im * cis(theta1) * cos(k * rx + k * vx * t / Gamma) + cis(theta2) * sin(k * ry + k * vy * t / Gamma) - cos(k * rz + k * vz * t / Gamma) + 1.0im * sin(k * rz + k * vz * t / Gamma) #Circularly��?1 0 -1 in epision��?-1 0 +1 in f_q
for ii = 1:Len
if Polar[ii] == 1.0
a[ii] = aplus
b[ii] = bplus
c[ii] = cplus
elseif Polar[ii] == -1.0
a[ii] = aminus
b[ii] = bminus
c[ii] = cminus
end
end
# g g 1
#dy .= 0.0
fill!(dy, false) # both are OK
factor = 1.0im / 2.0sqrt(2.0)
#Pre-calculation of the common factors
@inbounds for jj = 1:Len
for i = 1:12, j = 1:4
if el[j] - gl[i] == -1
ft[jj, i, j] = f[i, j] * a[jj] * cis((delta[jj, i] + ZB[j]) * t)
elseif el[j] - gl[i] == 0
ft[jj, i, j] = f[i, j] * b[jj] * cis((delta[jj, i] + ZB[j]) * t)
elseif el[j] - gl[i] == 1
ft[jj, i, j] = f[i, j] * c[jj] * cis((delta[jj, i] + ZB[j]) * t)
end
end
end
ftc = conj(ft)
@inbounds for jj = 1:Len
dy[1] += -factor * s[jj] * (ft[jj, 1, 1] * conj(y[1]) + ft[jj, 1, 2] * conj(y[2]) + ft[jj, 1, 3] * conj(y[2]) - ftc[jj, 1, 1] * y[4] - ftc[jj, 1, 2] * y[1] - ftc[jj, 1, 3] * y[4])
dy[2] += -factor * s[jj] * (ft[jj, 1, 1] * conj(y[2]) + ft[jj, 1, 2] * conj(y[1]) + ft[jj, 1, 3] * conj(y[4]) - ftc[jj, 2, 1] * y[1] - ftc[jj, 2, 2] * y[1] - ftc[jj, 2, 3] * y[4] - ftc[jj, 2, 4] * y[2])
dy[3] += -factor * s[jj] * (ft[jj, 1, 1] * conj(y[3]) + ft[jj, 1, 2] * conj(y[4]) + ft[jj, 1, 3] * conj(y[1]) - ftc[jj, 3, 1] * y[2] - ftc[jj, 3, 3] * y[4] - ftc[jj, 3, 4] * y[2])
dy[4] += -factor * s[jj] * (ft[jj, 1, 1] * conj(y[4]) + ft[jj, 1, 2] * conj(y[3]) + ft[jj, 1, 3] * conj(y[3]) - ftc[jj, 4, 1] * y[3] - ftc[jj, 4, 2] * y[2] - ftc[jj, 4, 3] * y[1] - ftc[jj, 4, 4] * y[2])
end
end
using DifferentialEquations, LinearAlgebra, StaticArrays
using Random, Statistics
include("Constants.jl")
# parameters defination
vx, vy, vz = rand(3)
rx, ry, rz = rand(3)
theta1, theta2 = 2pi * rand(2)
para01 = @SVector [vx, vy, vz, rx, ry, rz, theta1, theta2]
delta = 20rand(4, 12)
delta = SMatrix{4,12,Float64}(delta)
s = @SVector [1.0, 1.0, 1.0, 1.0]
Polar = @SVector [1.0, 1.0, 1.0, -1.0]
Len = length(Polar)
a = MVector{Len,ComplexF64}(complex(zeros(Len)))
b = similar(a)
c = similar(a)
# the 3D array defined here and will be transferred to ODEs
ft = complex(zeros(Len, 12, 4))
ftc=similar(ft)
p = (para01, delta, s, Polar, a, b, c, Len,ft,ftc)
# initial value
II = zeros(4)
II[1] = 1.0
u0 = complex(II)
tend = 1e-5
tstart = 0
tspan = (tstart * Gamma, tend * Gamma)
prob = ODEProblem(Test3DMolasses, u0, tspan, p)
@time y = solve(prob, Tsit5(), save_everystep=false) #20.92K allocations
Here is the Constants.jl
## Physical constants
const h = 6.626e-34
Mu = 1.660538e-27
Mn = 59
const Mm = Mu * Mn
const lamda = 606.3e-9
const k = 2pi / lamda
const Gamma = 2pi * 8.3e6
const velfact = Gamma / k
ZXtemp = [0, 0, 0, 76.2939, 122.9512, 122.9512, 122.9512, 147.8177, 147.8177, 147.8177, 147.8177, 147.8177] # importdata('X_ZeemanCaF.mat'); # 0:1:1000G
const ZX = round.((ZXtemp .- ZXtemp[1]) / 8.3, digits=2) # unit Gamma relative to the lowest level round to 0.01 Gamma
ZBtemp = [-5 * 2 * pi * 1e6, 0, 0, 0] / Gamma
const ZB = round.(ZBtemp, digits=2) # round(ZB*100)/100; # round to 0.01Gamma
#=
#Fp 0 1 1 1
# 13 14 15 16
#mFp 0 1 0 -1
ftemp = [-0.1355 0.4649 -0.4645 0.0000 # 1 1 1
-0.1355 0.4650 -0.0001 -0.4641 # 0 2 1
-0.1354 -0.0000 0.4646 -0.4642 # -1 3 1
-0.0020 -0.4705 0.4714 -0.4722 # 0 4 0
0.5614 0.0000 -0.1853 0.1846 # -1 5 1
-0.5612 0.1865 -0.0010 -0.1832 # 0 6 1
-0.5610 0.1851 -0.1844 0.0000 # 1 7 1
-0.0000 0.0000 -0.0000 0.4083 # -2 8 2
0.0009 -0.0000 0.2880 0.2894 # -1 9 2
-0.0011 -0.1659 -0.3334 -0.1675 # 0 10 2
-0.0009 -0.2880 -0.2894 0.0000 # 1 11 2
0.0000 0.4083 0.0000 0.0000] # 2 12 2
=#
ftemp = [-0.1354 0.4646 0.4646 0
0.1354 -0.4646 0 0.4646
-0.1354 0 -0.4646 -0.4646
0 0.4714 0.4714 0.4714
0.5612 0 0.1849 0.1849
-0.5612 0.1849 0 -0.1849
0.5612 -0.1849 -0.1849 0
0 0 0 0.4082
0 0 0.2887 -0.2887
0 0.1667 -0.3333 0.1667
0 -0.2887 0.2887 0
0 0.4082 0 0] # from Yang's method
for i = 1:4
ftemp[:, i] = ftemp[:, i] ./ sqrt(sum(ftemp[:, i] .^ 2))
end
const f = ftemp
const r = ftemp # the branching ratio
const gl = [1, 0, -1, 0, -1, 0, 1, -2, -1, 0, 1, 2]
const el = [0, 1, 0, -1]