I’m using Zygote to auto-differentiate the (small) output of a numerical loop. It is quite slow, probably due to the naive way I’ve implemented it. I’m interested in advice on (a) vectorizing the loop to get better performance or (b) using foldl to achieve the same. I’m also open to using other packages if something is better suited to this.
# Try to speed up the core loop differentiation with either vectorization or a fold
begin # Packages
using SpecialFunctions
using BenchmarkTools
using Zygote
using SphericalHarmonics
end
begin # Functions
function ZygoteSL(U, L, μ, k, r, Ecm)
dr = r[2] - r[1]
len = size(r)[1]-1
ur = Zygote.bufferfrom(zeros(3))
ui = Zygote.bufferfrom(zeros(3))
dur = Zygote.bufferfrom(zeros(3))
dui = Zygote.bufferfrom(zeros(3))
a = r[end-2]
ur[2] = 1e-6
ui[1] = 1e-12
ui[2] = 1e-6
for i in 3:len
vreal = Ecm - U[i,1]
vimag = -U[i,2]
w = 2*μ/ħ^2*complex(vreal, vimag) - L*(L+1)/r[i]^2
vreal = Ecm -U[i-1,1]
vimag = -U[i-1,2]
wmo = 2*μ/ħ^2*complex(vreal, vimag) - L*(L+1)/r[i]^2
vreal = Ecm - U[i+1,1]
vimag = -U[i+1,2]
wpo = 2*μ/ħ^2*complex(vreal, vimag) - L*(L+1)/r[i]^2
uval = (2*complex(ur[2],ui[2])-complex(ur[1],ui[1])-(dr^2/12.)*(10*w*complex(ur[2],ui[2])+wmo*complex(ur[1],ui[1])))/(1+(dr^2/12)*wpo)
ur[3] = real.(uval)
dur[3] = 0.5*(ur[3]-ur[1])/dr
ui[3] = imag.(uval)
dui[3] = 0.5*(ui[3]-ui[1])/dr
ur[1:2] = ur[2:3]
dur[1:2] = dur[2:3]
ui[1:2] = ui[2:3]
dui[1:2] = dui[2:3]
end
ua = complex(ur[2],ui[2])
dua = complex(dur[3],dui[3])
RL = ua / dua
SLtop = Hminus(k, a, L) - RL*Hminusprime(k, a, L)
SLbot = Hplus(k, a, L) - RL*Hplusprime(k, a, L)
SL = SLtop/SLbot
return [copy(real(SL)), copy(imag(SL))]
end
# Coulomb funcs
function GL(k, r, L)
return -k*r*sphericalbessely(L, k*r)
end
function FL(k, r, L)
return k*r*sphericalbesselj(L, k*r)
end
# Spherical Hankel functions
function Hminus(k, r, L)
return complex(GL(k, r, L), -FL(k, r, L))
end
function Hplus(k, r, L)
return complex(GL(k, r, L), FL(k, r, L))
end
# Derivatives
function Hminusprime(k, r, L)
return complex(Zygote.gradient(x -> GL(k, x, L), r)[1], -Zygote.gradient(x -> FL(k, x, L), r)[1])
end
function Hplusprime(k, r, L)
return complex(Zygote.gradient(x -> GL(k, x, L), r)[1], Zygote.gradient(x -> FL(k, x, L), r)[1])
end
# Koning-Delaroche Potential
function kd_params(A, Z, E)
N = A - Z
Ef = -11.23814 + 0.02646*A
v1 = 59.3 - 21.0*(N-Z)/A - 0.024*A
v2 = 0.007228 - (1.48e-6)*A
v3 = 1.994e-5 - (2.e-8)*A
v4 = 7.e-9
Vo = v1*(1-v2*(E-Ef)+v3*(E-Ef)^2-v4*(E-Ef)^3)
ro = (1.3039 - 0.4054/A^(1/3))*A^(1/3)
ao = 0.6778 - (1.487e-4)*A
w1 = 12.195 + 0.0167*A
w2 = 73.55 + 0.0795*A
Wro = w1*(E-Ef)^2/((E-Ef)^2+w2^2)
d1 = 16 - 16*(N-Z)/A
d2 = 0.0180 + 0.003802/(1+exp((A-156.)/8.))
d3 = 11.5
rw = (1.3424 - 0.01585*A^(1/3))*A^(1/3)
aw = 0.5446 - (1.656e-4)*A
Wso = d1*(E-Ef)^2*exp(-d2*(E-Ef))/((E-Ef)^2+d3^2)
return Vo, ro, ao, Wro, Wso, rw, aw
end
function kd_pots(A, Z, E, r)
Vo, ro, ao, Wro, Wso, rw, aw = kd_params(A, Z, E)
Vr = -Vo ./(1 .+ exp.(-(ro.-r)./ao))
W = -Wro ./(1 .+ exp.(-(ro.-r)./ao))
Ws = -4 .* Wso .* exp.(-(rw.-r)./aw) ./(1 .+exp.(-(rw.-r)./aw)).^2
return Vr, W, Ws
end
end
# Set up particular scattering problem
A = 65
Z = 29
N = A - Z
E = 10
L = 14
Ecm = 9.848393154293218
μ = 925.3211722114523
k = 0.6841596644044445
r = Vector(LinRange(0, 20, 2000))
dr = r[2] - r[1]
const global ħ = 197.3269804
# General a potential from K-D
Vreal, Wv, Ws = kd_pots(A, Z, E, r);
U = Float32.(hcat(Vreal, Wv + Ws);)
begin # Zygote test
fz(U) = ZygoteSL(U, L, μ, k, r, Ecm)
# Try to calculate gradient
@btime global fz_val = fz(U)
@btime global dfz_val = Zygote.jacobian(U -> fz(U), U)
end
This gives me the following output:
401.193 μs (8223 allocations: 632.94 KiB)
326.888 ms (2576395 allocations: 210.58 MiB)
The differentiation is 1000x slower than the actual calculation!
Note: I’m on Julia 1.9.4 and Zygote v0.6.72 for compatibility reasons (maybe worth a separate post for those, as I’d love to work with newer versions).