I have a system of equations:
for a_m, a_f \in \{1, \ldots, 46\} and e_m, e_f \in \{1, 2\}, where T(a_m, a_f) = 46 - \max\{a_m, a_f\} + 1. S^m, S^f, \Pi, \mu^{0m}, \mu^{0f} are matrices. I need to solve that system for each of \mu^{0m}_{a_m, e_m}, \mu^{0f}_{a_f, e_f}.
I have a script that solves that system using a generalization of the IPFP algorithm:
using NLsolve
using LinearAlgebra
const Z = 46
const Ī“ = 0.1
const Ī² = 0.95
T(am, af) = Z - max(am, af) + 1
const IDX_ae = CartesianIndices((1:46, 1:2))
function insideproduct_num(am, em, af, ef; Ī¼0m=Ī¼0m, Ī¼0f=Ī¼0f)
exponent = Ī²*(1 - Ī“)
power = 1.0
p = zero(promote_type(eltype(Ī¼0m), eltype(Ī¼0f)))
for k in 0:T(am, af)-1
p += 0.5 * power * log(abs.(Ī¼0m[am + k, em] * Ī¼0f[af + k, ef]))
power *= exponent
end
return exp(p)
end
function insideproduct_den(am, em, af, ef; Sm=Sm, Sf=Sf)
exponent = Ī²*(1 - Ī“)
power = 1.0
p = zero(promote_type(eltype(Sm), eltype(Sf)))
for k in 0:T(am, af)-1
p += 0.5 * power * log(abs.(Sm[am + k, em] * Sf[af + k, ef]))
power *= exponent
end
return exp(p)
end
function fillauxmat!(auxmat; Ī =Ī , Sm=Sm, Sf=Sf, Ī¼0m=Ī¼0m, Ī¼0f=Ī¼0f)
for idx in CartesianIndices(auxmat)
am, em, af, ef = Tuple(idx)
auxmat[am, em, af, ef] = Ī [am, em, af, ef] * sqrt(Sm[am, em]*Sf[af, ef]) /
insideproduct_den(am, em, af, ef; Sm=Sm, Sf=Sf)
end
return auxmat
end
@views function MMsysM!(res, Ī¼0m; auxmat=auxmat, Sm=Sm, Ī¼0f=Ī¼0f)
for idx_ae in IDX_ae
am, em = Tuple(idx_ae)
s = zero(eltype(Ī¼0m))
for idx_fi in IDX_ae
s += auxmat[am, em, idx_fi[1], idx_fi[2]] * insideproduct_num(am, em, idx_fi[1], idx_fi[2]; Ī¼0m=Ī¼0m, Ī¼0f=Ī¼0f)
end
res[am, em] = Sm[am, em] - Ī¼0m[am, em] - s
end
return res
end
@views function MMsysF!(res, Ī¼0f; auxmat=auxmat, Sf=Sf, Ī¼0m=Ī¼0m)
for idx_ae in IDX_ae
af, ef = Tuple(idx_ae)
s = zero(eltype(Ī¼0f))
for idx_mi in IDX_ae
s += auxmat[idx_mi[1], idx_mi[2], af, ef] * insideproduct_num(idx_mi[1], idx_mi[2], af, ef; Ī¼0m=Ī¼0m, Ī¼0f=Ī¼0f)
end
res[af, ef] = Sf[af, ef] - Ī¼0f[af, ef] - s
end
return res
end
function ipfp(Ī , Sm, Sf; initĪ¼0=initĪ¼0, auxmat=auxmat, tol=1, method=:trust_region, autodiff=:forward, iterations=1_000,
innerxtol=1, innerftol=1)
i = 0
Ī¼0f1 = Sf
Ī¼0m = initĪ¼0
d = Inf
while d > tol && i <= iterations
fillauxmat!(auxmat; Ī =Ī , Sm=Sm, Sf=Sf, Ī¼0m=Ī¼0m, Ī¼0f=Ī¼0f1)
solM = NLsolve.nlsolve((res, Ī¼0m) -> MMsysM!(res, Ī¼0m; auxmat=auxmat, Sm=Sm, Ī¼0f=Ī¼0f1), initĪ¼0; method=method, autodiff=autodiff,
xtol=innerxtol, ftol=innerftol, iterations=iterations)
Ī¼0m = solM.zero
solF = NLsolve.nlsolve((res, Ī¼0f) -> MMsysF!(res, Ī¼0f; auxmat=auxmat, Sf=Sf, Ī¼0m=Ī¼0m), initĪ¼0; method=method, autodiff=autodiff,
xtol=innerxtol, ftol=innerftol, iterations=iterations)
Ī¼0f2 = solF.zero
d = norm(Ī¼0f1 - Ī¼0f2, Inf)
Ī¼0f1 = Ī¼0f2
i += 1
end
println("i = ", i, ", d = ", d)
return abs.(Ī¼0m), abs.(Ī¼0f1)
end
Initialize with
Ī = 5 .* rand(46, 2, 46, 2)
Sm = 5 .* rand(46, 2)
Sf = 5 .* rand(46, 2)
Ī¼0m = similar(Sm)
Ī¼0f = similar(Sf)
auxmat = similar(Ī )
initĪ¼0 = 5 .* ones(size(Sm))
Now run it
ipfp(Ī , Sm, Sf; initĪ¼0= initĪ¼0)
I would like to speed that up as much as possible. Right now it takes over 2 seconds to solve this toy example. Are there any obvious speed ups Iām missing? Perhaps multithreading?
Note: If I scale the matrices by a factor larger than 5, the algorithm slows down even more.