When I solve the ODEs, I try to write the 136 equations with a for loop, which makes the program more clean. However, compare with the initial program (in which I expand each equation completely), the new program needs twice the time to run, the allocation are the same for both editions, and with save_everystep=false
both have 53 allocations, so I think the allocation shouldn’t be a probrem. I am wondering how to increase its running speed, any suggestion is welcome.
Here, ee, gg, ge are
defined to be const arrays to locate the y[Number]
, which should also not be a problem
using DifferentialEquations, Plots, LinearAlgebra, StaticArrays, Dates
using Random, LoopVectorization, Statistics, Distributed, Distributions
include("Test3DMolassesTest.jl")
include("Constants.jl")
const wmin = 1e-2 # the common factor
const Power = 200.0 # mW
const beamsize = 10.0 # 10 mm radius
const Is = 4.86 # saturation intensity, X-A 4.86 mW/cm2; X - B 5.5 mW/cm2
const Ip = 2Power / pi / (beamsize / 10)^2 # laser intensity
vp = 1
para = [0.15 69 74.5 34.83 73 1 2 2 1 7.56 6.56 -5.25 2.22]
P = zeros(5)
Polar = ones(5)
Delta = zeros(1, 5)
s = zeros(5)
P[1:4] = para[2:5]
P[5] = 0.0 # 1500.0 mW
Polar[1:4] = Polartemp[Int.(para[6:9])] # Polartemp is constant array defined in Constants.jl
Polar[5] = -1.0
Delta[1:4] = para[10:13]
Delta[5] = 100.0 # 100Gamma relative to the F = 2 state
Delta = round.(Delta, digits=2) #round(Delta*100)/100; # normalized to 0.01 Gamma
s[1:4] = Ip / Is * P[1:4] / sum(P[1:4])
s[5] = 2P[5] / pi / (beamsize / 10)^2 / Is # from the bottom up, F = 1 0 1 2, 2
s = sqrt.(s)
delta = deltatemp .- Delta
delta = delta'
delta = SMatrix{5,12,Float64}(delta)
s = SVector{5,Float64}(s)
Polar = SVector{5,Float64}(Polar) # turn into Static arrays
Len = length(Polar)
atemp = MVector{Len,ComplexF64}(complex(zeros(Len)))
btemp = similar(atemp)
ctemp = similar(atemp)
ft = complex(zeros(Len, 12, 4))
ftc = similar(ft)
u0 = IIF # IIF is a constant array defined in Constants.jl
tend = 2pi / wmin / Gamma
tstart = -tend
tspan = (tstart * Gamma, tend * Gamma)
vx, vy, vz = [1 2 3]
rx, ry, rz = [1 2 3] * lamda
theta1, theta2 = [pi / 2 pi]
para01 = @SVector [vx, vy, vz, rx, ry, rz, theta1, theta2]
p = (para01, delta, s, Polar, Len, atemp, btemp, ctemp, ft, ftc) #Len cannot be put into para01, that will make Len Float64 type
prob = ODEProblem(Test3DMolasses, u0, tspan, p)
@time y = solve(prob, Tsit5(), save_everystep=false)
And the Constants.jl
here
## Physical constants
const h = 6.626e-34
Mu = 1.660538e-27
Mn = 59.0
const Mm = Mu * Mn
const lamda = 606.3e-9
const k = 2pi / lamda
const Gamma = 2pi * 8.3e6
const velfact = Gamma / k
ZXtemp = [0, 0, 0, 76.2939, 122.9512, 122.9512, 122.9512, 147.8177, 147.8177, 147.8177, 147.8177, 147.8177] # importdata('X_ZeemanCaF.mat'); # 0:1:1000G
const ZX = round.((ZXtemp .- ZXtemp[1]) / 8.3, digits=2) # unit Gamma relative to the lowest level round to 0.01 Gamma
ZBtemp = [-5 * 2 * pi * 1e6, 0, 0, 0] / Gamma
const ZB = round.(ZBtemp, digits=2) # round(ZB*100)/100; # round to 0.01Gamma
#=
#Fp 0 1 1 1
# 13 14 15 16
#mFp 0 1 0 -1
ftemp = [-0.1355 0.4649 -0.4645 0.0000 # 1 1 1
-0.1355 0.4650 -0.0001 -0.4641 # 0 2 1
-0.1354 -0.0000 0.4646 -0.4642 # -1 3 1
-0.0020 -0.4705 0.4714 -0.4722 # 0 4 0
0.5614 0.0000 -0.1853 0.1846 # -1 5 1
-0.5612 0.1865 -0.0010 -0.1832 # 0 6 1
-0.5610 0.1851 -0.1844 0.0000 # 1 7 1
-0.0000 0.0000 -0.0000 0.4083 # -2 8 2
0.0009 -0.0000 0.2880 0.2894 # -1 9 2
-0.0011 -0.1659 -0.3334 -0.1675 # 0 10 2
-0.0009 -0.2880 -0.2894 0.0000 # 1 11 2
0.0000 0.4083 0.0000 0.0000] # 2 12 2
=#
ftemp = [-0.1354 0.4646 0.4646 0
0.1354 -0.4646 0 0.4646
-0.1354 0 -0.4646 -0.4646
0 0.4714 0.4714 0.4714
0.5612 0 0.1849 0.1849
-0.5612 0.1849 0 -0.1849
0.5612 -0.1849 -0.1849 0
0 0 0 0.4082
0 0 0.2887 -0.2887
0 0.1667 -0.3333 0.1667
0 -0.2887 0.2887 0
0 0.4082 0 0] # from Yang's method
for i = 1:4
ftemp[:, i] = ftemp[:, i] ./ sqrt(sum(ftemp[:, i] .^ 2))
end
const f = ftemp
const r = ftemp # the branching ratio
const gl = [1, 0, -1, 0, -1, 0, 1, -2, -1, 0, 1, 2]
const el = [0, 1, 0, -1]
II = zeros(136)
II[1] = 1.0 / 12.0
II[13] = 1.0 / 12.0
II[24] = 1.0 / 12.0
II[34] = 1.0 / 12.0
II[43] = 1.0 / 12.0
II[51] = 1.0 / 12.0
II[58] = 1.0 / 12.0
II[64] = 1.0 / 12.0
II[69] = 1.0 / 12.0
II[73] = 1.0 / 12.0
II[76] = 1.0 / 12.0
II[78] = 1.0 / 12.0
const IIF = complex(II)
const Polartemp = [1.0, -1.0] # use Float64 types can reduce a lot of problems
#laser 1 2 3 4 Level
const deltatemp = -[ZX[1]-ZX[2] ZX[1]-ZX[4] ZX[1]-ZX[6] ZX[1]-ZX[10] ZX[1]-ZX[10] # 1 1 1
ZX[2]-ZX[2] ZX[2]-ZX[4] ZX[2]-ZX[6] ZX[2]-ZX[10] ZX[2]-ZX[10] # 0 2 1
ZX[3]-ZX[2] ZX[3]-ZX[4] ZX[3]-ZX[6] ZX[3]-ZX[10] ZX[3]-ZX[10] # -1 3 1
ZX[4]-ZX[2] ZX[4]-ZX[4] ZX[4]-ZX[6] ZX[4]-ZX[10] ZX[4]-ZX[10] # 0 4 0
ZX[5]-ZX[2] ZX[5]-ZX[4] ZX[5]-ZX[6] ZX[5]-ZX[10] ZX[5]-ZX[10] # -1 5 1
ZX[6]-ZX[2] ZX[6]-ZX[4] ZX[6]-ZX[6] ZX[6]-ZX[10] ZX[6]-ZX[10] # 0 6 1
ZX[7]-ZX[2] ZX[7]-ZX[4] ZX[7]-ZX[6] ZX[7]-ZX[10] ZX[7]-ZX[10] # 1 7 1
ZX[8]-ZX[2] ZX[8]-ZX[4] ZX[8]-ZX[6] ZX[8]-ZX[10] ZX[8]-ZX[10] # -2 8 2
ZX[9]-ZX[2] ZX[9]-ZX[4] ZX[9]-ZX[6] ZX[9]-ZX[10] ZX[9]-ZX[10] # -1 9 2
ZX[10]-ZX[2] ZX[10]-ZX[4] ZX[10]-ZX[6] ZX[10]-ZX[10] ZX[10]-ZX[10] # 0 10 2
ZX[11]-ZX[2] ZX[11]-ZX[4] ZX[11]-ZX[6] ZX[11]-ZX[10] ZX[11]-ZX[10] # 1 11 2
ZX[12]-ZX[2] ZX[12]-ZX[4] ZX[12]-ZX[6] ZX[12]-ZX[10] ZX[12]-ZX[10]] # 2 12 2
const q = [-1, 0, 1] # define spherical vector component
ggtemp = zeros(12, 12)
eetemp = zeros(4, 4)
getemp = zeros(12, 4)
icount = 0
for ga = 1:12, gb = ga:12
global icount += 1
ggtemp[ga, gb] = icount
end
for ea = 1:4, eb = ea:4
global icount += 1
eetemp[ea, eb] = icount
end
for ga = 1:12, eb = 1:4
global icount += 1
getemp[ga, eb] = icount
end
const gg=Int.(ggtemp)
const ee=Int.(eetemp)
const ge=Int.(getemp)
function Test3DMolasses(dy, y, p, t)
para01, delta, s, Polar, Len, a, b, c, ft, ftc = p
vx, vy, vz, rx, ry, rz, theta1, theta2 = para01
sx, cx = sincos(k * (rx + vx * (t / Gamma)))
sy, cy = sincos(k * (ry + vy * (t / Gamma)))
sz, cz = sincos(k * (rz + vz * (t / Gamma)))
cisth1 = cis(theta1)
cisth2 = cis(theta2)
aplus = -1.0im * cisth1 * cx + cisth2 * sy + cz - 1.0im * sz
bplus = sqrt(2.0) * cisth1 * sx + sqrt(2.0) * cisth2 * cy
cplus = -1.0im * cisth1 * cx - cisth2 * sy - cz - 1.0im * sz #Circularly��?1 0 -1 in epision��?-1 0 +1 in f_q
# sigma- means the laser component polarization from the positive direction is +
aminus = -1.0im * cisth1 * cx - cisth2 * sy + cz + 1.0im * sz
bminus = -sqrt(2.0) * cisth1 * sx + sqrt(2.0) * cisth2 * cy
cminus = -1.0im * cisth1 * cx + cisth2 * sy - cz + 1.0im * sz #Circularly��?1 0 -1 in epision��?-1 0 +1 in f_q
@inbounds for ii in 1:Len
if Polar[ii] == 1.0
a[ii] = aplus
b[ii] = bplus
c[ii] = cplus
elseif Polar[ii] == -1.0
a[ii] = aminus
b[ii] = bminus
c[ii] = cminus
end
end
fill!(dy, false) # both are OK
factor = 1.0im / 2.0sqrt(2.0)
@inbounds for jj in 1:Len
for i = 1:12, j = 1:4
if el[j] - gl[i] == -1
ft[jj, i, j] = f[i, j] * a[jj] * cis((delta[jj, i] + ZB[j]) * t)
elseif el[j] - gl[i] == 0
ft[jj, i, j] = f[i, j] * b[jj] * cis((delta[jj, i] + ZB[j]) * t)
elseif el[j] - gl[i] == 1
ft[jj, i, j] = f[i, j] * c[jj] * cis((delta[jj, i] + ZB[j]) * t)
end
end
end
@. ftc = conj(ft)
## Main part of the ODEs
@inbounds for jj in 1:Len
icount = 0
@inbounds for ga in 1:12, gb in ga:12
icount += 1
@inbounds for ecp in 1:4
dy[icount] += -factor * s[jj] * (ft[jj, ga, ecp] * conj(y[ge[gb, ecp]]) - ftc[jj, gb, ecp] * y[ge[ga, ecp]])
end
end
@inbounds for ea in 1:4, eb in ea:4
icount += 1
@inbounds for gc in 1:12
dy[icount] += factor * s[jj] * (ft[jj, gc, eb] * conj(y[ge[gc, ea]]) - ftc[jj, gc, ea] * y[ge[gc, eb]])
end
end
@inbounds for ga in 1:12, eb in 1:4
icount += 1
@inbounds for gc in 1:12
if ga <= gc
dy[icount] += factor * s[jj] * ft[jj, gc, eb] * y[gg[ga, gc]]
else
dy[icount] += factor * s[jj] * ft[jj, gc, eb] * conj(y[gg[gc, ga]])
end
end
@inbounds for ecp in 1:4
if ecp <= eb
dy[icount] -= factor * s[jj] * ft[jj, ga, ecp] * y[ee[ecp, eb]]
else
dy[icount] -= factor * s[jj] * ft[jj, ga, ecp] * conj(y[ee[eb, ecp]])
end
end
end
end
icount = 0
@inbounds for ga in 1:12, gb in ga:12
icount += 1
@inbounds for ecp in 1:4, ecpp in 1:4
if r[ga, ecp] != 0 && r[gb, ecpp] != 0 && el[ecp] - gl[ga] == el[ecpp] - gl[gb] && ecp <= ecpp
dy[icount] += r[ga, ecp] * r[gb, ecpp] * cis((ZB[ecp] - ZB[ecpp] + ZX[gb] - ZX[ga]) * t) * y[ee[ecp, ecpp]]
elseif r[ga, ecp] != 0 && r[gb, ecpp] != 0 && el[ecp] - gl[ga] == el[ecpp] - gl[gb] && ecp > ecpp
dy[icount] += r[ga, ecp] * r[gb, ecpp] * cis((ZB[ecp] - ZB[ecpp] + ZX[gb] - ZX[ga]) * t) * conj(y[ee[ecpp, ecp]])
end
end
end
@inbounds for jj in 79:88
dy[jj] -= y[jj]
end
@inbounds for jj in 89:136
dy[jj] -= y[jj] / 2.0
end
end
The initial program looks like this
function Test3DMolasses(dy, y, p, t)
para01, delta, s, Polar, Len, a, b, c, ft, ftc = p
vx, vy, vz, rx, ry, rz, theta1, theta2 = para01
sx, cx = sincos(k * (rx + vx * (t / Gamma)))
sy, cy = sincos(k * (ry + vy * (t / Gamma)))
sz, cz = sincos(k * (rz + vz * (t / Gamma)))
cisth1 = cis(theta1)
cisth2 = cis(theta2)
aplus = -1.0im * cisth1 * cx + cisth2 * sy + cz - 1.0im * sz
bplus = sqrt(2.0) * cisth1 * sx + sqrt(2.0) * cisth2 * cy
cplus = -1.0im * cisth1 * cx - cisth2 * sy - cz - 1.0im * sz #Circularly��?1 0 -1 in epision��?-1 0 +1 in f_q
# sigma- means the laser component polarization from the positive direction is +
aminus = -1.0im * cisth1 * cx - cisth2 * sy + cz + 1.0im * sz
bminus = -sqrt(2.0) * cisth1 * sx + sqrt(2.0) * cisth2 * cy
cminus = -1.0im * cisth1 * cx + cisth2 * sy - cz + 1.0im * sz #Circularly��?1 0 -1 in epision��?-1 0 +1 in f_q
for ii = 1:Len
if Polar[ii] == 1.0
a[ii] = aplus
b[ii] = bplus
c[ii] = cplus
elseif Polar[ii] == -1.0
a[ii] = aminus
b[ii] = bminus
c[ii] = cminus
end
end
# g g 1
#dy .= 0.0
fill!(dy, false) # both are OK
factor = 1.0im / 2.0sqrt(2.0)
@inbounds for jj = 1:Len
for i = 1:12, j = 1:4
if el[j] - gl[i] == -1
ft[jj, i, j] = f[i, j] * a[jj] * cis((delta[jj, i] + ZB[j]) * t)
elseif el[j] - gl[i] == 0
ft[jj, i, j] = f[i, j] * b[jj] * cis((delta[jj, i] + ZB[j]) * t)
elseif el[j] - gl[i] == 1
ft[jj, i, j] = f[i, j] * c[jj] * cis((delta[jj, i] + ZB[j]) * t)
end
end
end
@. ftc = conj(ft)
@inbounds for jj = 1:Len
dy[1] += -factor * s[jj] * (ft[jj, 1, 1] * conj(y[89]) + ft[jj, 1, 2] * conj(y[90]) + ft[jj, 1, 3] * conj(y[91]) - ftc[jj, 1, 1] * y[89] - ftc[jj, 1, 2] * y[90] - ftc[jj, 1, 3] * y[91])
dy[2] += -factor * s[jj] * (ft[jj, 1, 1] * conj(y[93]) + ft[jj, 1, 2] * conj(y[94]) + ft[jj, 1, 3] * conj(y[95]) - ftc[jj, 2, 1] * y[89] - ftc[jj, 2, 2] * y[90] - ftc[jj, 2, 3] * y[91] - ftc[jj, 2, 4] * y[92])
dy[3] += -factor * s[jj] * (ft[jj, 1, 1] * conj(y[97]) + ft[jj, 1, 2] * conj(y[98]) + ft[jj, 1, 3] * conj(y[99]) - ftc[jj, 3, 1] * y[89] - ftc[jj, 3, 3] * y[91] - ftc[jj, 3, 4] * y[92])
dy[4] += -factor * s[jj] * (ft[jj, 1, 1] * conj(y[101]) + ft[jj, 1, 2] * conj(y[102]) + ft[jj, 1, 3] * conj(y[103]) - ftc[jj, 4, 1] * y[89] - ftc[jj, 4, 2] * y[90] - ftc[jj, 4, 3] * y[91] - ftc[jj, 4, 4] * y[92])
dy[5] += -factor * s[jj] * (ft[jj, 1, 1] * conj(y[105]) + ft[jj, 1, 2] * conj(y[106]) + ft[jj, 1, 3] * conj(y[107]) - ftc[jj, 5, 1] * y[89] - ftc[jj, 5, 3] * y[91] - ftc[jj, 5, 4] * y[92])
dy[6] += -factor * s[jj] * (ft[jj, 1, 1] * conj(y[109]) + ft[jj, 1, 2] * conj(y[110]) + ft[jj, 1, 3] * conj(y[111]) - ftc[jj, 6, 1] * y[89] - ftc[jj, 6, 2] * y[90] - ftc[jj, 6, 3] * y[91] - ftc[jj, 6, 4] * y[92])
dy[7] += -factor * s[jj] * (ft[jj, 1, 1] * conj(y[113]) + ft[jj, 1, 2] * conj(y[114]) + ft[jj, 1, 3] * conj(y[115]) - ftc[jj, 7, 1] * y[89] - ftc[jj, 7, 2] * y[90] - ftc[jj, 7, 3] * y[91])
dy[8] += -factor * s[jj] * (ft[jj, 1, 1] * conj(y[117]) + ft[jj, 1, 2] * conj(y[118]) + ft[jj, 1, 3] * conj(y[119]) - ftc[jj, 8, 4] * y[92])
I cannot show all of it since it’s too long