I’m trying to speed up this version of the IPFP algorithm, which alternates between solving 2 systems of nonlinear equations until convergence:
using NLsolve
using LinearAlgebra
const szAge = 64 - 19 + 1
const szE = 2
const β = 0.92
const δ = 0.10
maxT(am, af; Z=realage(szAge)) = Z - max(am, af) + 1
realage(x) = x + 18
realageinv(x) = x - 18
const IDX_ae = CartesianIndices((1:szAge, 1:szE))
@views function insideproduct(am, em, af, ef; μ0m=μ0m, μ0f=μ0f, Sm=Sm, Sf=Sf)
prod(abs((μ0m[am + k, em] * μ0f[af + k, ef])/(Sm[am + k, em] * Sf[af + k, ef]))^(0.5*(β*(1 - δ))^k) for k in 0:maxT(realage(am), realage(af))-1)
end
@views function MMsysM!(res, μ0m; Π::F=UMM, Sm=Sm, Sf=Sf, μ0f=μ0f) where F
for idx_ae in IDX_ae
am, em = Tuple(idx_ae)
res[am, em] = Sm[am, em] - μ0m[am, em] -
sum( Π(realage(am), em, realage(idx_fi[1]), idx_fi[2]) * sqrt(Sm[am, em]*Sf[idx_fi[1], idx_fi[2]]) *
insideproduct(am, em, idx_fi[1], idx_fi[2]; μ0m=μ0m, μ0f=μ0f, Sm=Sm, Sf=Sf) for idx_fi in IDX_ae )
end
return res
end
@views function MMsysF!(res, μ0f; Π::F=UMM, Sm=Sm, Sf=Sf, μ0m=μ0m) where F
for idx_ae in IDX_ae
af, ef = Tuple(idx_ae)
res[af, ef] = Sf[af, ef] - μ0f[af, ef] -
sum( Π(realage(idx_mi[1]), idx_mi[2], realage(af), ef) * sqrt(Sm[idx_mi[1], idx_mi[2]]*Sf[af, ef]) *
insideproduct(idx_mi[1], idx_mi[2], af, ef; μ0m=μ0m, μ0f=μ0f, Sm=Sm, Sf=Sf) for idx_mi in IDX_ae )
end
return res
end
function ipfp(Π::F, Sm, Sf; initμ0=initμ0, tol=1e-8, method=:trust_region, iterations=1_000, autodiff=:forward) where F
i = 0
μ0f1 = Sf
μ0m = initμ0
d = Inf
while d > tol && i <= iterations
solM = NLsolve.nlsolve((res, μ0m) -> MMsysM!(res, μ0m; Π=Π, Sm=Sm, Sf=Sf, μ0f=μ0f1), initμ0; method=method, autodiff=autodiff)
μ0m = solM.zero
solF = NLsolve.nlsolve((res, μ0f) -> MMsysF!(res, μ0f; Π=Π, Sm=Sm, Sf=Sf, μ0m=μ0m), initμ0; method=method, autodiff=autodiff)
μ0f2 = solF.zero
d = norm(μ0f1 - μ0f2, Inf)
μ0f1 = μ0f2
i += 1
end
return abs.(μ0m), abs.(μ0f1)
end
@views Π(am, em, af, ef; Πmat=Πmat) = Πmat[realageinv(am), em, realageinv(af), ef]
Πmat = 1000 .* rand(szAge, szE, szAge, szE)
Sm = 1000 .* rand(szAge, szE)
Sf = 1000 .* rand(szAge, szE)
initμ0 = ones(size(Sf))
Right now it takes 5.6 seconds on my computer:
julia> @benchmark ipfp((am, em, af, ef; Πmat=$Πmat) -> Π(am, em, af, ef; Πmat=$Πmat), $Sm, $Sf; method=:trust_region, autodiff=:finite)
BenchmarkTools.Trial: 1 sample with 1 evaluation.
Single result which took 5.585 s (0.00% GC) to evaluate,
with a memory estimate of 24.29 MiB, over 78419 allocations.