Ultimately, I want to speed up fullsys!
, and as you can see there are functions that fill arrays. I thought those filling operations could be parallelized. How can I get started with that?
const szAge = 64 - 19 + 1
const szE = 2
const β = 0.92
const δ = 0.10
maxT(am, af; Z=realage(szAge)) = Z - max(am, af) + 1
realage(x) = x + 18
realageinv(x) = x - 18
function insideproduct_num(am, em, af, ef; μ0m=μ0m, μ0f=μ0f)
exponent = 0.5*(β*(1 - δ))
power = 1.0
p = one(promote_type(eltype(μ0m), eltype(μ0f)))
for k in 0:maxT(realage(am), realage(af))-1
p *= abs(μ0m[am + k, em] * μ0f[af + k, ef])^power
power *= exponent
end
return p
end
function insideproduct_den(am, em, af, ef; Sm=Sm, Sf=Sf)
exponent = 0.5*(β*(1 - δ))
power = 1.0
p = one(promote_type(eltype(Sm), eltype(Sf)))
for k in 0:maxT(realage(am), realage(af))-1
p *= abs(Sm[am + k, em] * Sf[af + k, ef])^power
power *= exponent
end
return p
end
function fillauxmat!(auxmat; Π=Π, Sm=Sm, Sf=Sf, μ0m=μ0m, μ0f=μ0f)
for ef in 1:szE, af in 1:szAge, em in 1:szE, am in 1:szAge
auxmat[am, em, af, ef] = Π[am, em, af, ef] * sqrt(Sm[am, em]*Sf[af, ef]) /
insideproduct_den(am, em, af, ef; Sm=Sm, Sf=Sf)
end
return auxmat
end
function MMsysM!(res, μ0m; auxmat=auxmat, Sm=Sm, μ0f=μ0f)
for em in 1:szE, am in 1:szAge
s = zero(eltype(μ0m))
for ef in 1:szE, af in 1:szAge
s += auxmat[am, em, af, ef] * insideproduct_num(am, em, af, ef; μ0m=μ0m, μ0f=μ0f)
end
res[am, em] = Sm[am, em] - μ0m[am, em] - s
end
return res
end
function MMsysF!(res, μ0f; auxmat=auxmat, Sf=Sf, μ0m=μ0m)
for ef in 1:szE, af in 1:szAge
s = zero(eltype(μ0f))
for em in 1:szE, am in 1:szAge
s += auxmat[am, em, af, ef] * insideproduct_num(am, em, af, ef; μ0m=μ0m, μ0f=μ0f)
end
res[af, ef] = Sf[af, ef] - μ0f[af, ef] - s
end
return res
end
function fullsys!(resm, resf; auxmat=auxmat, Sm=Sm, Sf=Sf, μ0m=μ0m, μ0f=μ0f)
fillauxmat!(auxmat; Π=Π, Sm=Sm, Sf=Sf, μ0m=μ0m, μ0f=μ0f)
MMsysM!(resm, μ0m; auxmat=auxmat, Sm=Sm, μ0f=μ0f)
MMsysF!(resf, μ0f; auxmat=auxmat, Sf=Sf, μ0m=μ0m)
return resm, resf
end
Π = 1000 .* rand(szAge, szE, szAge, szE)
auxmat = similar(Π)
Sm = 1000 .* rand(szAge, szE)
Sf = 1000 .* rand(szAge, szE)
μ0m = rand(szAge, szE)
μ0f = rand(szAge, szE)
resm = similar(μ0m)
resf = similar(μ0f)
for reference, here’s the benchmark for this version:
julia> @benchmark fullsys!($resm, $resf; auxmat=$auxmat, Sm=$Sm, Sf=$Sf, μ0m=$μ0m, μ0f=$μ0f)
BenchmarkTools.Trial: 625 samples with 1 evaluation.
Range (min … max): 6.852 ms … 13.761 ms ┊ GC (min … max): 0.00% … 0.00%
Time (median): 7.932 ms ┊ GC (median): 0.00%
Time (mean ± σ): 7.996 ms ± 515.543 μs ┊ GC (mean ± σ): 0.00% ± 0.00%
▄▂▃▄█▁▃▅▅▂▅▃▂ ▁ ▂
▂▂▂▂▃▃▃▄▄▃▄▇▆▄▆█████████████████▇█▇▆▅▅▅▆▅▃▃▃▃▃▃▂▃▂▄▃▃▁▁▂▁▁▂ ▄
6.85 ms Histogram: frequency by time 9.47 ms <
Memory estimate: 96 bytes, allocs estimate: 2.