Hi, could you help me see another case, which is more close to my real program though it’s still a simlified version. But it already had all the problems of my final program. Even I set the save_everystep = false, there is still a lot of allocations. That means it has the possibility of improvement. I used the .+,.-,.*,./ to vectorizatied the calculation and use static arrays for all the constant parameters. The idea is from your blog and apply this to my final program improve the speed by one order of magnitude. I just wondering whether I can do more…
https://tutorials.sciml.ai/html/introduction/03-optimizing_diffeq_code.html
using DifferentialEquations, LinearAlgebra, StaticArrays
include("TestSimple02.jl")
s = [1.0 1.0]
s = sqrt.(s)
s = SMatrix{1,2,Float64}(s) # turn into Static arrays
Polar = [-1 1]
Polar = SMatrix{1,2,Int64}(Polar) # turn into Static arrays
delta = [2.5 ;-2.5]
delta = SVector{2,Float64}(delta) # turn into Static arrays
h = 6.626e-34
hbar = h / 2pi
torr = 26.63e-9
Gamma = 1.0 / torr
lamda = 780e-9
k = 2.0pi / lamda
forfact = hbar * k * Gamma
velfact = Gamma / k
# -1 0 +1
f = [0.1826 0 0
-0.1291 0.1291 0
0.0745 -0.1491 0.0745
0 0.1291 -0.1291
0 0 0.1826]
for i = 1:3
f[:, i] = f[:, i] ./ sqrt(sum(f[:, i] .^ 2))
end
f = SMatrix{5,3,Float64}(f) # turn into Static arrays
r = f
theta1 = 0
theta2 = 0
u0 = complex([1 / 5 0 0 0 0 1 / 5 0 0 0 1 / 5 0 0 1 / 5 0 1 / 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0])
wmin = 1e-2
vround = wmin * velfact
tend = 1e-5
tstart = -tend
tspan = (tstart * Gamma, tend * Gamma)
vx = 0.1
vy = 0.1
vz = 0.1
rx = 0.5lamda
ry = 0.5lamda
rz = 0.5lamda
para01=@SVector [vx, vy, vz, rx, ry, rz, theta1, theta2, k, Gamma]
#p = (vx, vy, vz, rx, ry, rz, theta1, theta2, k, Gamma, r, f, s, delta, Polar)
p=(para01,r, f, s, delta, Polar)
prob = ODEProblem(TestSimple02, u0, tspan, p)
#tinf = @snoopi_deep y = solve(prob, Tsit5())
#@benchmark y = solve(prob, Tsit5())
@time y = solve(prob, Tsit5())
#@timev y = solve(prob, Tsit5())
#@profile y = solve(prob,Tsit5())
#plot!(y.t, real(y[1, :]))
#end
function TestSimple02(dy, y, p, t)
# s1 s2 are sqrt(Ik./Isat)
#vx, vy, vz, rx, ry, rz, theta1, theta2, k, Gamma, r, f, s, delta, Polar = p
para01,r, f, s, delta, Polar = p
vx, vy, vz, rx, ry, rz, theta1, theta2, k, Gamma=para01
#a, b, c = AssCal(p, t)
# sigma.+ means the laser component polarization from the positive direction is .+
aplus = -1.0im .* exp(1.0im .* theta1) .* cos(k .* rx .+ k .* vx .* t ./ Gamma) .+ exp(1.0im .* theta2) .* sin(k .* ry .+ k .* vy .* t ./ Gamma) .+ cos(k .* rz .+ k .* vz .* t ./ Gamma) .- 1.0im .* sin(k .* rz .+ k .* vz .* t ./ Gamma)
bplus = sqrt(2.0) .* exp(1.0im .* theta1) .* sin(k .* rx .+ k .* vx .* t ./ Gamma) .+ sqrt(2.0) .* exp(1.0im .* theta2) .* cos(k .* ry .+ k .* vy .* t ./ Gamma)
cplus = -1.0im .* exp(1.0im .* theta1) .* cos(k .* rx .+ k .* vx .* t ./ Gamma) .- exp(1.0im .* theta2) .* sin(k .* ry .+ k .* vy .* t ./ Gamma) .- cos(k .* rz .+ k .* vz .* t ./ Gamma) .- 1.0im .* sin(k .* rz .+ k .* vz .* t ./ Gamma)
#sigma.- means the laser component polarization from the positive direction is .+
aminus = -1.0im .* exp(1.0im .* theta1) .* cos(k .* rx .+ k .* vx .* t ./ Gamma) .- exp(1.0im .* theta2) .* sin(k .* ry .+ k .* vy .* t ./ Gamma) .+ cos(k .* rz .+ k .* vz .* t ./ Gamma) .+ 1.0im .* sin(k .* rz .+ k .* vz .* t ./ Gamma)
bminus = -sqrt(2.0) .* exp(1.0im .* theta1) .* sin(k .* rx .+ k .* vx .* t ./ Gamma) .+ sqrt(2.0) .* exp(1.0im .* theta2) .* cos(k .* ry .+ k .* vy .* t ./ Gamma)
cminus = -1.0im .* exp(1.0im .* theta1) .* cos(k .* rx .+ k .* vx .* t ./ Gamma) .+ exp(1.0im .* theta2) .* sin(k .* ry .+ k .* vy .* t ./ Gamma) .- cos(k .* rz .+ k .* vz .* t ./ Gamma) .+ 1.0im .* sin(k .* rz .+ k .* vz .* t ./ Gamma)
Len = length(Polar)
a = complex(zeros(Len))
b = complex(zeros(Len))
c = complex(zeros(Len))
for ii = 1:Len
if Polar[ii] == 1
a[ii] = aplus
b[ii] = bplus
c[ii] = cplus
elseif Polar[ii] == -1
a[ii] = aminus
b[ii] = bminus
c[ii] = cminus
end
end
factor = 1.0im .* s ./ 2.0sqrt(2.0)
dy[1] = -dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* conj(y[22]) .- conj(f[1, 1] .* c) .* exp.(.-1.0im .* delta .* t) .* y[22]) .+
.+r[1, 1] .* r[1, 1] .* y[16]
dy[2] = -dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* conj(y[25]) .- conj(f[2, 1] .* b) .* exp.(.-1.0im .* delta .* t) .* y[22] .- conj(f[2, 2] .* c) .* exp.(.-1.0im .* delta .* t) .* y[23]) .+
.+r[1, 1] .* r[2, 2] .* y[17]
dy[3] = -dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* conj(y[28]) .- conj(f[3, 1] .* a) .* exp.(.-1.0im .* delta .* t) .* y[22] .- conj(f[3, 2] .* b) .* exp.(.-1.0im .* delta .* t) .* y[23] .- conj(f[3, 3] .* c) .* exp.(.-1.0im .* delta .* t) .* y[24]) .+
.+r[1, 1] .* r[3, 3] .* y[18]
dy[4] = -dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* conj(y[31]) .- conj(f[4, 2] .* a) .* exp.(.-1.0im .* delta .* t) .* y[23] .- conj(f[4, 3] .* b) .* exp.(.-1.0im .* delta .* t) .* y[24])
dy[5] = -dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* conj(y[34]) .- conj(f[5, 3] .* a) .* exp.(.-1.0im .* delta .* t) .* y[24])
dy[6] = -dot(factor, f[2, 1] .* b .* exp.(1.0im .* delta .* t) .* conj(y[25]) .+ f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* conj(y[26]) .- conj(f[2, 1] .* b) .* exp.(.-1.0im .* delta .* t) .* y[25] .- conj(f[2, 2] .* c) .* exp.(.-1.0im .* delta .* t) .* y[26]) .+
.+r[2, 1] .* r[2, 1] .* y[16] .+ r[2, 2] .* r[2, 2] .* y[19]
dy[7] = -dot(factor, f[2, 1] .* b .* exp.(1.0im .* delta .* t) .* conj(y[28]) .+ f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* conj(y[29]) .- conj(f[3, 1] .* a) .* exp.(.-1.0im .* delta .* t) .* y[25] .- conj(f[3, 2] .* b) .* exp.(.-1.0im .* delta .* t) .* y[26] .- conj(f[3, 3] .* c) .* exp.(.-1.0im .* delta .* t) .* y[27]) .+
.+r[2, 1] .* r[3, 2] .* y[17] .+ r[2, 2] .* r[3, 3] .* y[20]
dy[8] = -dot(factor, f[2, 1] .* b .* exp.(1.0im .* delta .* t) .* conj(y[31]) .+ f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* conj(y[32]) .- conj(f[4, 2] .* a) .* exp.(.-1.0im .* delta .* t) .* y[26] .- conj(f[4, 3] .* b) .* exp.(.-1.0im .* delta .* t) .* y[27]) .+
.+r[2, 1] .* r[4, 3] .* y[18]
dy[9] = -dot(factor, f[2, 1] .* b .* exp.(1.0im .* delta .* t) .* conj(y[34]) .+ f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* conj(y[35]) .- conj(f[5, 3] .* a) .* exp.(.-1.0im .* delta .* t) .* y[27])
dy[10] = -dot(factor, f[3, 1] .* a .* exp.(1.0im .* delta .* t) .* conj(y[28]) .+ f[3, 2] .* b .* exp.(1.0im .* delta .* t) .* conj(y[29]) .+ f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* conj(y[30]) .- conj(f[3, 1] .* a) .* exp.(.-1.0im .* delta .* t) .* y[28] .- conj(f[3, 2] .* b) .* exp.(.-1.0im .* delta .* t) .* y[29] .- conj(f[3, 3] .* c) .* exp.(.-1.0im .* delta .* t) .* y[30]) .+
.+r[3, 1] .* r[3, 1] .* y[16] .+ r[3, 2] .* r[3, 2] .* y[19] .+ r[3, 3] .* r[3, 3] .* y[21]
dy[11] = -dot(factor, f[3, 1] .* a .* exp.(1.0im .* delta .* t) .* conj(y[31]) .+ f[3, 2] .* b .* exp.(1.0im .* delta .* t) .* conj(y[32]) .+ f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* conj(y[33]) .- conj(f[4, 2] .* a) .* exp.(.-1.0im .* delta .* t) .* y[29] .- conj(f[4, 3] .* b) .* exp.(.-1.0im .* delta .* t) .* y[30]) .+
.+r[3, 1] .* r[4, 2] .* y[17] .+ r[3, 2] .* r[4, 3] .* y[20]
dy[12] = -dot(factor, f[3, 1] .* a .* exp.(1.0im .* delta .* t) .* conj(y[34]) .+ f[3, 2] .* b .* exp.(1.0im .* delta .* t) .* conj(y[35]) .+ f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* conj(y[36]) .- conj(f[5, 3] .* a) .* exp.(.-1.0im .* delta .* t) .* y[30]) .+
.+r[3, 1] .* r[5, 3] .* y[18]
dy[13] = -dot(factor, f[4, 2] .* a .* exp.(1.0im .* delta .* t) .* conj(y[32]) .+ f[4, 3] .* b .* exp.(1.0im .* delta .* t) .* conj(y[33]) .- conj(f[4, 2] .* a) .* exp.(.-1.0im .* delta .* t) .* y[32] .- conj(f[4, 3] .* b) .* exp.(.-1.0im .* delta .* t) .* y[33]) .+
.+r[4, 2] .* r[4, 2] .* y[19] .+ r[4, 3] .* r[4, 3] .* y[21]
dy[14] = -dot(factor, f[4, 2] .* a .* exp.(1.0im .* delta .* t) .* conj(y[35]) .+ f[4, 3] .* b .* exp.(1.0im .* delta .* t) .* conj(y[36]) .- conj(f[5, 3] .* a) .* exp.(.-1.0im .* delta .* t) .* y[33]) .+
.+r[4, 2] .* r[5, 3] .* y[20]
dy[15] = -dot(factor, f[5, 3] .* a .* exp.(1.0im .* delta .* t) .* conj(y[36]) .- conj(f[5, 3] .* a) .* exp.(.-1.0im .* delta .* t) .* y[36]) .+
.+r[5, 3] .* r[5, 3] .* y[21]
dy[16] = dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* conj(y[22]) .+ f[2, 1] .* b .* exp.(1.0im .* delta .* t) .* conj(y[25]) .+ f[3, 1] .* a .* exp.(1.0im .* delta .* t) .* conj(y[28]) .- conj(f[1, 1] .* c) .* exp.(.-1.0im .* delta .* t) .* y[22] .- conj(f[2, 1] .* b) .* exp.(.-1.0im .* delta .* t) .* y[25] .- conj(f[3, 1] .* a) .* exp.(.-1.0im .* delta .* t) .* y[28]) .+
.-y[16]
dy[17] = dot(factor, f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* conj(y[25]) .+ f[3, 2] .* b .* exp.(1.0im .* delta .* t) .* conj(y[28]) .+ f[4, 2] .* a .* exp.(1.0im .* delta .* t) .* conj(y[31]) .- conj(f[1, 1] .* c) .* exp.(.-1.0im .* delta .* t) .* y[23] .- conj(f[2, 1] .* b) .* exp.(.-1.0im .* delta .* t) .* y[26] .- conj(f[3, 1] .* a) .* exp.(.-1.0im .* delta .* t) .* y[29]) .+
.-y[17]
dy[18] = dot(factor, f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* conj(y[28]) .+ f[4, 3] .* b .* exp.(1.0im .* delta .* t) .* conj(y[31]) .+ f[5, 3] .* a .* exp.(1.0im .* delta .* t) .* conj(y[34]) .- conj(f[1, 1] .* c) .* exp.(.-1.0im .* delta .* t) .* y[24] .- conj(f[2, 1] .* b) .* exp.(.-1.0im .* delta .* t) .* y[27] .- conj(f[3, 1] .* a) .* exp.(.-1.0im .* delta .* t) .* y[30]) .+
.-y[18]
dy[19] = dot(factor, f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* conj(y[26]) .+ f[3, 2] .* b .* exp.(1.0im .* delta .* t) .* conj(y[29]) .+ f[4, 2] .* a .* exp.(1.0im .* delta .* t) .* conj(y[32]) .- conj(f[2, 2] .* c) .* exp.(.-1.0im .* delta .* t) .* y[26] .- conj(f[3, 2] .* b) .* exp.(.-1.0im .* delta .* t) .* y[29] .- conj(f[4, 2] .* a) .* exp.(.-1.0im .* delta .* t) .* y[32]) .+
.-y[19]
dy[20] = dot(factor, f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* conj(y[29]) .+ f[4, 3] .* b .* exp.(1.0im .* delta .* t) .* conj(y[32]) .+ f[5, 3] .* a .* exp.(1.0im .* delta .* t) .* conj(y[35]) .- conj(f[2, 2] .* c) .* exp.(.-1.0im .* delta .* t) .* y[27] .- conj(f[3, 2] .* b) .* exp.(.-1.0im .* delta .* t) .* y[30] .- conj(f[4, 2] .* a) .* exp.(.-1.0im .* delta .* t) .* y[33]) .+
.-y[20]
dy[21] = dot(factor, f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* conj(y[30]) .+ f[4, 3] .* b .* exp.(1.0im .* delta .* t) .* conj(y[33]) .+ f[5, 3] .* a .* exp.(1.0im .* delta .* t) .* conj(y[36]) .- conj(f[3, 3] .* c) .* exp.(.-1.0im .* delta .* t) .* y[30] .- conj(f[4, 3] .* b) .* exp.(.-1.0im .* delta .* t) .* y[33] .- conj(f[5, 3] .* a) .* exp.(.-1.0im .* delta .* t) .* y[36]) .+
.-y[21]
dy[22] = dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* y[1] .+ f[2, 1] .* b .* exp.(1.0im .* delta .* t) .* y[2] .+ f[3, 1] .* a .* exp.(1.0im .* delta .* t) .* y[3]) .+
-dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* y[16]) .+
.-1 ./ 2 .* y[22]
dy[23] = dot(factor, f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* y[2] .+ f[3, 2] .* b .* exp.(1.0im .* delta .* t) .* y[3] .+ f[4, 2] .* a .* exp.(1.0im .* delta .* t) .* y[4]) .+
-dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* y[17]) .+
.-1 ./ 2 .* y[23]
dy[24] = dot(factor, f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* y[3] .+ f[4, 3] .* b .* exp.(1.0im .* delta .* t) .* y[4] .+ f[5, 3] .* a .* exp.(1.0im .* delta .* t) .* y[5]) .+
-dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* y[18]) .+
.-1 ./ 2 .* y[24]
dy[25] = dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* conj(y[2]) .+ f[2, 1] .* b .* exp.(1.0im .* delta .* t) .* y[6] .+ f[3, 1] .* a .* exp.(1.0im .* delta .* t) .* y[7]) .+
-dot(factor, f[2, 1] .* b .* exp.(1.0im .* delta .* t) .* y[16] .+ f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* conj(y[17])) .+
.-1 ./ 2 .* y[25]
dy[26] = dot(factor, f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* y[6] .+ f[3, 2] .* b .* exp.(1.0im .* delta .* t) .* y[7] .+ f[4, 2] .* a .* exp.(1.0im .* delta .* t) .* y[8]) .+
-dot(factor, f[2, 1] .* b .* exp.(1.0im .* delta .* t) .* y[17] .+ f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* y[19]) .+
.-1 ./ 2 .* y[26]
dy[27] = dot(factor, f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* y[7] .+ f[4, 3] .* b .* exp.(1.0im .* delta .* t) .* y[8] .+ f[5, 3] .* a .* exp.(1.0im .* delta .* t) .* y[9]) .+
-dot(factor, f[2, 1] .* b .* exp.(1.0im .* delta .* t) .* y[18] .+ f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* y[20]) .+
.-1 ./ 2 .* y[27]
dy[28] = dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* conj(y[3]) .+ f[2, 1] .* b .* exp.(1.0im .* delta .* t) .* conj(y[7]) .+ f[3, 1] .* a .* exp.(1.0im .* delta .* t) .* y[10]) .+
-dot(factor, f[3, 1] .* a .* exp.(1.0im .* delta .* t) .* y[16] .+ f[3, 2] .* b .* exp.(1.0im .* delta .* t) .* conj(y[17]) .+ f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* conj(y[18])) .+
.-1 ./ 2 .* y[28]
dy[29] = dot(factor, f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* conj(y[7]) .+ f[3, 2] .* b .* exp.(1.0im .* delta .* t) .* y[10] .+ f[4, 2] .* a .* exp.(1.0im .* delta .* t) .* y[11]) .+
-dot(factor, f[3, 1] .* a .* exp.(1.0im .* delta .* t) .* y[17] .+ f[3, 2] .* b .* exp.(1.0im .* delta .* t) .* y[19] .+ f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* conj(y[20])) .+
.-1 ./ 2 .* y[29]
dy[30] = dot(factor, f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* y[10] .+ f[4, 3] .* b .* exp.(1.0im .* delta .* t) .* y[11] .+ f[5, 3] .* a .* exp.(1.0im .* delta .* t) .* y[12]) .+
-dot(factor, f[3, 1] .* a .* exp.(1.0im .* delta .* t) .* y[18] .+ f[3, 2] .* b .* exp.(1.0im .* delta .* t) .* y[20] .+ f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* y[21]) .+
.-1 ./ 2 .* y[30]
dy[31] = dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* conj(y[4]) .+ f[2, 1] .* b .* exp.(1.0im .* delta .* t) .* conj(y[8]) .+ f[3, 1] .* a .* exp.(1.0im .* delta .* t) .* conj(y[11])) .+
-dot(factor, f[4, 2] .* a .* exp.(1.0im .* delta .* t) .* conj(y[17]) .+ f[4, 3] .* b .* exp.(1.0im .* delta .* t) .* conj(y[18])) .+
.-1 ./ 2 .* y[31]
dy[32] = dot(factor, f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* conj(y[8]) .+ f[3, 2] .* b .* exp.(1.0im .* delta .* t) .* conj(y[11]) .+ f[4, 2] .* a .* exp.(1.0im .* delta .* t) .* y[13]) .+
-dot(factor, f[4, 2] .* a .* exp.(1.0im .* delta .* t) .* y[19] .+ f[4, 3] .* b .* exp.(1.0im .* delta .* t) .* conj(y[20])) .+
.-1 ./ 2 .* y[32]
dy[33] = dot(factor, f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* conj(y[11]) .+ f[4, 3] .* b .* exp.(1.0im .* delta .* t) .* y[13] .+ f[5, 3] .* a .* exp.(1.0im .* delta .* t) .* y[14]) .+
-dot(factor, f[4, 2] .* a .* exp.(1.0im .* delta .* t) .* y[20] .+ f[4, 3] .* b .* exp.(1.0im .* delta .* t) .* y[21]) .+
.-1 ./ 2 .* y[33]
dy[34] = dot(factor, f[1, 1] .* c .* exp.(1.0im .* delta .* t) .* conj(y[5]) .+ f[2, 1] .* b .* exp.(1.0im .* delta .* t) .* conj(y[9]) .+ f[3, 1] .* a .* exp.(1.0im .* delta .* t) .* conj(y[12])) .+
-dot(factor, f[5, 3] .* a .* exp.(1.0im .* delta .* t) .* conj(y[18])) .+
.-1 ./ 2 .* y[34]
dy[35] = dot(factor, f[2, 2] .* c .* exp.(1.0im .* delta .* t) .* conj(y[9]) .+ f[3, 2] .* b .* exp.(1.0im .* delta .* t) .* conj(y[12]) .+ f[4, 2] .* a .* exp.(1.0im .* delta .* t) .* conj(y[14])) .+
-dot(factor, f[5, 3] .* a .* exp.(1.0im .* delta .* t) .* conj(y[20])) .+
.-1 ./ 2 .* y[35]
dy[36] = dot(factor, f[3, 3] .* c .* exp.(1.0im .* delta .* t) .* conj(y[12]) .+ f[4, 3] .* b .* exp.(1.0im .* delta .* t) .* conj(y[14]) .+ f[5, 3] .* a .* exp.(1.0im .* delta .* t) .* y[15]) .+
-dot(factor, f[5, 3] .* a .* exp.(1.0im .* delta .* t) .* y[21]) .+
.-1 ./ 2 .* y[36]
end