I’ve written a long function, and according to https://docs.julialang.org/en/latest/manual/performance-tips/, I rewrote it with some functions to avoid the global variables. The performance is nicer now. My questions are, does the cost of function call matter in Julia? Is writing so many function necessary? how can I optimize my code (I found the line @. prop_e = cis( ele * (xplusy * dt/2.0)) and the “get momentum” part might be the bottleneck and the @threading strategy seems unsuitable because the gc time is very large)?

The code is

```
function evolution(u::Array{Complex128})
t = Array((1:nt) * dt)
ele = map(GetEleField, t)
save("elefield.jld", "t", t, "ele", ele)
FFTW.set_num_threads(Sys.CPU_CORES)
x, p = xpgrid()
########### common part
p_fft! = plan_fft!( similar(u), flags=FFTW.MEASURE )
prop_x, prop_p = operation_real_evolution(x, p)
prop_e = similar( prop_x )
r_a = 750 #ngrid * dx /2.0 - 80.0
δ = 15.0
r_sp = 120.0
δ_sp = 10.0
absorb = Array(Float64, ngrid, ngrid)
splitter = Array(Float64, ngrid, ngrid)
xplusy = Array(Float64, ngrid, ngrid)
for j in 1:ngrid
for i in 1:ngrid
xplusy[i, j] = x[i] + x[j]
absorb[i, j] = (1.0 - out(x[i], r_a, δ ))* (1.0 - out(x[j], r_a, δ))
splitter[i, j] = out(x[i], r_sp, δ_sp) * out(x[j], r_sp, δ_sp)
end
end
##### splitting
pp = similar(p)
A = -cumsum(ele) * dt
A² = A.*A
uo = zeros(Complex128, ngrid, ngrid)
up = zeros(Complex128, ngrid, ngrid)
#####
for i in 1:nt
step_evolution_real!(u, p_fft!, prop_x, prop_p, ele[i], xplusy, absorb, prop_e)
########## get momentum
if i % 3 == 0
@. uo = (u * splitter) * cis(A[i] * xplusy)
p_fft! * uo
pp .= (p.^2/2.0) * (nt-i) .+ p*sum(@view A[i:nt]) + sum(@view A²[i:nt])/2.0
for j2 in 1:ngrid
for j1 in 1:ngrid
uo[j1, j2] = uo[j1, j2] * cis( -dt * (pp[j1] + pp[j2]) )
end
end
up .+= uo
end
########### end get momentum
if (mod(i, 400) == 0)
println(100*i/real(nt),"%, time = ", now())
end
end
nothing
end
out(x, r_a, δ) = 1.0 ./ (1.0 + exp( -(abs(x) - r_a) ./δ ))
function step_evolution_real!(u::Array{Complex128, 2},
p_fft!::Base.DFT.FFTW.cFFTWPlan{Complex128, -1, true, 2},
prop_x::Array{Complex128, 2}, prop_p::Array{Complex128, 2},
ele::Float64, xplusy::Array{Float64, 2},
absorb::Array{Float64, 2}, prop_e::Array{Complex128, 2})
@. prop_e = cis( ele * (xplusy * dt/2.0))
u .*= prop_x .* prop_e
p_fft! * u
u .*= prop_p
p_fft! \ u
@. u *= prop_x * prop_e
u .*= absorb
nothing
end
function operation_real_evolution(x::Array{Float64, 1}, p::Array{Float64, 1})
prop_x = Array{Complex128}(ngrid , ngrid)
prop_p = Array{Complex128}(ngrid , ngrid)
for j in 1:ngrid
for i in 1:ngrid
prop_x[i, j] = cis( -(dt / 2) * Potential(x[i], x[j]) )
prop_p[i, j] = cis( -(dt / 2) * (p[i]^2 + p[j]^2) )
end
end
prop_x, prop_p
end
```