I have a system of equations:
for a_m, a_f \in \{1, \ldots, 46\} and e_m, e_f \in \{1, 2\}, where T(a_m, a_f) = 46 - \max\{a_m, a_f\} + 1. S^m, S^f, \Pi, \mu^{0m}, \mu^{0f} are matrices. I need to solve that system for each of \mu^{0m}_{a_m, e_m}, \mu^{0f}_{a_f, e_f}.
I have a script that solves that system using a generalization of the IPFP algorithm:
using NLsolve
using LinearAlgebra
const Z = 46
const Ī“ = 0.1
const β = 0.95
T(am, af) = Z - max(am, af) + 1
const IDX_ae = CartesianIndices((1:46, 1:2))
function insideproduct_num(am, em, af, ef; μ0m=μ0m, μ0f=μ0f)
exponent = β*(1 - Γ)
power = 1.0
p = zero(promote_type(eltype(μ0m), eltype(μ0f)))
for k in 0:T(am, af)-1
p += 0.5 * power * log(abs.(μ0m[am + k, em] * μ0f[af + k, ef]))
power *= exponent
end
return exp(p)
end
function insideproduct_den(am, em, af, ef; Sm=Sm, Sf=Sf)
exponent = β*(1 - Γ)
power = 1.0
p = zero(promote_type(eltype(Sm), eltype(Sf)))
for k in 0:T(am, af)-1
p += 0.5 * power * log(abs.(Sm[am + k, em] * Sf[af + k, ef]))
power *= exponent
end
return exp(p)
end
function fillauxmat!(auxmat; Π=Π, Sm=Sm, Sf=Sf, μ0m=μ0m, μ0f=μ0f)
for idx in CartesianIndices(auxmat)
am, em, af, ef = Tuple(idx)
auxmat[am, em, af, ef] = Ī [am, em, af, ef] * sqrt(Sm[am, em]*Sf[af, ef]) /
insideproduct_den(am, em, af, ef; Sm=Sm, Sf=Sf)
end
return auxmat
end
@views function MMsysM!(res, μ0m; auxmat=auxmat, Sm=Sm, μ0f=μ0f)
for idx_ae in IDX_ae
am, em = Tuple(idx_ae)
s = zero(eltype(μ0m))
for idx_fi in IDX_ae
s += auxmat[am, em, idx_fi[1], idx_fi[2]] * insideproduct_num(am, em, idx_fi[1], idx_fi[2]; μ0m=μ0m, μ0f=μ0f)
end
res[am, em] = Sm[am, em] - μ0m[am, em] - s
end
return res
end
@views function MMsysF!(res, μ0f; auxmat=auxmat, Sf=Sf, μ0m=μ0m)
for idx_ae in IDX_ae
af, ef = Tuple(idx_ae)
s = zero(eltype(μ0f))
for idx_mi in IDX_ae
s += auxmat[idx_mi[1], idx_mi[2], af, ef] * insideproduct_num(idx_mi[1], idx_mi[2], af, ef; μ0m=μ0m, μ0f=μ0f)
end
res[af, ef] = Sf[af, ef] - μ0f[af, ef] - s
end
return res
end
function ipfp(Π, Sm, Sf; initμ0=initμ0, auxmat=auxmat, tol=1, method=:trust_region, autodiff=:forward, iterations=1_000,
innerxtol=1, innerftol=1)
i = 0
μ0f1 = Sf
μ0m = initμ0
d = Inf
while d > tol && i <= iterations
fillauxmat!(auxmat; Π=Π, Sm=Sm, Sf=Sf, μ0m=μ0m, μ0f=μ0f1)
solM = NLsolve.nlsolve((res, μ0m) -> MMsysM!(res, μ0m; auxmat=auxmat, Sm=Sm, μ0f=μ0f1), initμ0; method=method, autodiff=autodiff,
xtol=innerxtol, ftol=innerftol, iterations=iterations)
μ0m = solM.zero
solF = NLsolve.nlsolve((res, μ0f) -> MMsysF!(res, μ0f; auxmat=auxmat, Sf=Sf, μ0m=μ0m), initμ0; method=method, autodiff=autodiff,
xtol=innerxtol, ftol=innerftol, iterations=iterations)
μ0f2 = solF.zero
d = norm(μ0f1 - μ0f2, Inf)
μ0f1 = μ0f2
i += 1
end
println("i = ", i, ", d = ", d)
return abs.(μ0m), abs.(μ0f1)
end
Initialize with
Ī = 5 .* rand(46, 2, 46, 2)
Sm = 5 .* rand(46, 2)
Sf = 5 .* rand(46, 2)
μ0m = similar(Sm)
μ0f = similar(Sf)
auxmat = similar(Ī )
initμ0 = 5 .* ones(size(Sm))
Now run it
ipfp(Π, Sm, Sf; initμ0= initμ0)
I would like to speed that up as much as possible. Right now it takes over 2 seconds to solve this toy example. Are there any obvious speed ups Iām missing? Perhaps multithreading?
Note: If I scale the matrices by a factor larger than 5, the algorithm slows down even more.