Speed up solution of large system of equations

amrods · August 10, 2023, 2:25am

I have a system of equations:

\begin{aligned} & S^m_{a_m, e_m}-\mu^{0m}_{a_m, e_m}=\sum_{a_m=1}^{46} {\Pi}_{a_m, e_m, a_f, e_f}\sqrt{S^m_{a_m, e_m} S^f_{a_f, e_f}} \prod_{k=0}^{T(a_m, a_f)-1}\left(\frac{\mu^{0m}_{a_m+k, e_m} \mu^{0f}_{a_f+k, e_f}}{S^m_{a_m+k, e_m} S^f_{a_f+k, e_f}}\right)^{(1 / 2)(\beta(1-\delta))^k}, \\ & S^f_{a_f, e_f}-\mu^{0f}_{a_f, e_f}=\sum_{a_f=1}^{46} {\Pi}_{a_m, e_m, a_f, e_f}\sqrt{S^m_{a_m, e_m} S^f_{a_f, e_f}} \prod_{k=0}^{T(a_m, a_f)-1}\left(\frac{\mu^{0m}_{a_m+k, e_m} \mu^{0f}_{a_f+k, e_f}}{S^m_{a_m+k, e_m} S^f_{a_f+k, e_f}}\right)^{(1 / 2)(\beta(1-\delta))^k}. \end{aligned}

for a_m, a_f \in \{1, \ldots, 46\} and e_m, e_f \in \{1, 2\}, where T(a_m, a_f) = 46 - \max\{a_m, a_f\} + 1. S^m, S^f, \Pi, \mu^{0m}, \mu^{0f} are matrices. I need to solve that system for each of \mu^{0m}_{a_m, e_m}, \mu^{0f}_{a_f, e_f}.

I have a script that solves that system using a generalization of the IPFP algorithm:

using NLsolve
using LinearAlgebra

const Z = 46
const δ = 0.1
const β = 0.95

T(am, af) = Z - max(am, af) + 1

const IDX_ae = CartesianIndices((1:46, 1:2))

function insideproduct_num(am, em, af, ef; μ0m=μ0m, μ0f=μ0f)
    exponent = β*(1 - δ)
    power = 1.0
    p = zero(promote_type(eltype(μ0m), eltype(μ0f)))
    for k in 0:T(am, af)-1
        p += 0.5 * power * log(abs.(μ0m[am + k, em] * μ0f[af + k, ef]))
        power *= exponent
    end
    return exp(p)
end

function insideproduct_den(am, em, af, ef; Sm=Sm, Sf=Sf)
    exponent = β*(1 - δ)
    power = 1.0
    p = zero(promote_type(eltype(Sm), eltype(Sf)))
    for k in 0:T(am, af)-1
        p += 0.5 * power * log(abs.(Sm[am + k, em] * Sf[af + k, ef]))
        power *= exponent
    end
    return exp(p)
end

function fillauxmat!(auxmat; Π=Π, Sm=Sm, Sf=Sf, μ0m=μ0m, μ0f=μ0f)
    for idx in CartesianIndices(auxmat)
        am, em, af, ef = Tuple(idx)
        auxmat[am, em, af, ef] = Π[am, em, af, ef] * sqrt(Sm[am, em]*Sf[af, ef]) /
                insideproduct_den(am, em, af, ef; Sm=Sm, Sf=Sf)
    end
    return auxmat
end

@views function MMsysM!(res, μ0m; auxmat=auxmat, Sm=Sm, μ0f=μ0f)
    for idx_ae in IDX_ae
        am, em = Tuple(idx_ae)
        s = zero(eltype(μ0m))
        for idx_fi in IDX_ae
            s += auxmat[am, em, idx_fi[1], idx_fi[2]] * insideproduct_num(am, em, idx_fi[1], idx_fi[2]; μ0m=μ0m, μ0f=μ0f)
        end
        res[am, em] = Sm[am, em] - μ0m[am, em] - s
    end
    return res
end

@views function MMsysF!(res, μ0f; auxmat=auxmat, Sf=Sf, μ0m=μ0m)
    for idx_ae in IDX_ae
        af, ef = Tuple(idx_ae)
        s = zero(eltype(μ0f))
        for idx_mi in IDX_ae
            s += auxmat[idx_mi[1], idx_mi[2], af, ef] * insideproduct_num(idx_mi[1], idx_mi[2], af, ef; μ0m=μ0m, μ0f=μ0f)
        end
        res[af, ef] = Sf[af, ef] - μ0f[af, ef] - s
    end
    return res
end

function ipfp(Π, Sm, Sf; initμ0=initμ0, auxmat=auxmat, tol=1, method=:trust_region, autodiff=:forward, iterations=1_000,
        innerxtol=1, innerftol=1)
    i = 0
    μ0f1 = Sf
    μ0m = initμ0
    d = Inf
    while d > tol && i <= iterations
        fillauxmat!(auxmat; Π=Π, Sm=Sm, Sf=Sf, μ0m=μ0m, μ0f=μ0f1)
        solM = NLsolve.nlsolve((res, μ0m) -> MMsysM!(res, μ0m; auxmat=auxmat, Sm=Sm, μ0f=μ0f1), initμ0; method=method, autodiff=autodiff,
                    xtol=innerxtol, ftol=innerftol, iterations=iterations)
        μ0m = solM.zero
        solF = NLsolve.nlsolve((res, μ0f) -> MMsysF!(res, μ0f; auxmat=auxmat, Sf=Sf, μ0m=μ0m), initμ0; method=method, autodiff=autodiff,
                    xtol=innerxtol, ftol=innerftol, iterations=iterations)
        μ0f2 = solF.zero
        d = norm(μ0f1 - μ0f2, Inf)
        μ0f1 = μ0f2
        i += 1
    end
    println("i = ", i, ", d = ", d)
    return abs.(μ0m), abs.(μ0f1)
end

Initialize with

Π = 5 .* rand(46, 2, 46, 2)
Sm = 5 .* rand(46, 2)
Sf = 5 .* rand(46, 2)
μ0m = similar(Sm)
μ0f = similar(Sf)
auxmat = similar(Π)
initμ0 = 5 .* ones(size(Sm))

Now run it

ipfp(Π, Sm, Sf; initμ0= initμ0)

I would like to speed that up as much as possible. Right now it takes over 2 seconds to solve this toy example. Are there any obvious speed ups I’m missing? Perhaps multithreading?
Note: If I scale the matrices by a factor larger than 5, the algorithm slows down even more.

RobertGregg · August 10, 2023, 4:54am

So in situations like this I always check three things to see what might be slowing the code down:

@code_warntype to see if there are problems with identifying variable types
@benchmark from the BenchmarkTools.jl package to look at allocations
@profview to see where the function is spending most of its time.

When I ran @code_warntype ipfp(Π, Sm, Sf; initμ0= initμ0) I noticed that auxmat came back as type Any.

I think the issue is you have to pass the variables down through each function. When you write:

function ipfp(Π, Sm, Sf; initμ0=initμ0, auxmat=auxmat) #plus other keywords
...
end

The function ipfp doesn’t know what auxmat is because it hasn’t been defined yet. So when you call it without giving it auxmat, it has to go looking for the variable in the global scope.

You could instead write:

function ipfp(Π, Sm, Sf; initμ0=initμ0, auxmat) #no default value for auxmat
...
end

and call the function as:

ipfp(Π, Sm, Sf; initμ0=initμ0, auxmat=auxmat)

#This works too
#Julia is cool and will match keywords with variable names
ipfp(Π, Sm, Sf; initμ0, auxmat)

This will have to be fixed for the other function definitions as well.

amrods · August 10, 2023, 5:40am

Thank you for that. I wasn’t aware of that problem. I still get some Any types. I suspect the problem is that Julia doesn’t know the type of the object that returns the zero of the system inside ipfp. Any suggestions for that?

using NLsolve
using LinearAlgebra
using Random

Random.seed!(1293804)

const Z = 46
const δ = 0.1
const β = 0.95

T(am, af) = Z - max(am, af) + 1

const IDX_ae = CartesianIndices((1:46, 1:2))

function insideproduct_num(am, em, af, ef; μ0m, μ0f)
    exponent = β*(1 - δ)
    power = 1.0
    p = zero(promote_type(eltype(μ0m), eltype(μ0f)))
    for k in 0:T(am, af)-1
        p += 0.5 * power * log(abs.(μ0m[am + k, em] * μ0f[af + k, ef]))
        power *= exponent
    end
    return exp(p)
end

function insideproduct_den(am, em, af, ef; Sm, Sf)
    exponent = β*(1 - δ)
    power = 1.0
    p = zero(promote_type(eltype(Sm), eltype(Sf)))
    for k in 0:T(am, af)-1
        p += 0.5 * power * log(abs.(Sm[am + k, em] * Sf[af + k, ef]))
        power *= exponent
    end
    return exp(p)
end

function fillauxmat!(auxmat; Π, Sm, Sf)
    for idx in CartesianIndices(auxmat)
        am, em, af, ef = Tuple(idx)
        auxmat[am, em, af, ef] = Π[am, em, af, ef] * sqrt(Sm[am, em]*Sf[af, ef]) /
                insideproduct_den(am, em, af, ef; Sm=Sm, Sf=Sf)
    end
    return auxmat
end

@views function MMsysM!(res, μ0m; auxmat, Sm, μ0f)
    for idx_ae in IDX_ae
        am, em = Tuple(idx_ae)
        s = zero(eltype(μ0m))
        for idx_fi in IDX_ae
            s += auxmat[am, em, idx_fi[1], idx_fi[2]] * insideproduct_num(am, em, idx_fi[1], idx_fi[2]; μ0m=μ0m, μ0f=μ0f)
        end
        res[am, em] = Sm[am, em] - μ0m[am, em] - s
    end
    return res
end

@views function MMsysF!(res, μ0f; auxmat, Sf, μ0m)
    for idx_ae in IDX_ae
        af, ef = Tuple(idx_ae)
        s = zero(eltype(μ0f))
        for idx_mi in IDX_ae
            s += auxmat[idx_mi[1], idx_mi[2], af, ef] * insideproduct_num(idx_mi[1], idx_mi[2], af, ef; μ0m=μ0m, μ0f=μ0f)
        end
        res[af, ef] = Sf[af, ef] - μ0f[af, ef] - s
    end
    return res
end

function ipfp(Π, Sm, Sf; initμ0, auxmat, tol=1e-3, method=:trust_region, autodiff=:forward, iterations=1_000,
        innerxtol=1e-3, innerftol=1e-3)
    i = 0
    μ0f1 = Sf
    μ0m = initμ0
    d = Inf
    fillauxmat!(auxmat; Π=Π, Sm=Sm, Sf=Sf)
    while d > tol && i <= iterations
        solM = NLsolve.nlsolve((res, μ0m) -> MMsysM!(res, μ0m; auxmat=auxmat, Sm=Sm, μ0f=μ0f1), initμ0; method=method, autodiff=autodiff,
                    xtol=innerxtol, ftol=innerftol, iterations=iterations)
        μ0m = solM.zero
        solF = NLsolve.nlsolve((res, μ0f) -> MMsysF!(res, μ0f; auxmat=auxmat, Sf=Sf, μ0m=μ0m), initμ0; method=method, autodiff=autodiff,
                    xtol=innerxtol, ftol=innerftol, iterations=iterations)
        μ0f2 = solF.zero
        d = norm(μ0f1 - μ0f2, Inf)
        μ0f1 = μ0f2
        i += 1
    end
    println("i = ", i, ", d = ", d)
    return abs.(μ0m), abs.(μ0f1)
end

RobertGregg · August 10, 2023, 6:39am

Here is a slightly better version that doesn’t given Any types for the output:

function ipfp(Π, Sm, Sf; initμ0, auxmat, tol=1, method=:trust_region, autodiff=:forward, iterations=1_000, innerxtol=1, innerftol=1)
    i = 0
    μ0f1 = Sf   #maybe μ0f1 = copy(Sf) instead?
    μ0f2 = Sf
    μ0m = initμ0
    d = Inf
    while d > tol && i <= iterations
        fillauxmat!(auxmat; Π=Π, Sm=Sm, Sf=Sf, μ0m=μ0m, μ0f=μ0f1)
        solM = NLsolve.nlsolve((res, μ0m) -> MMsysM!(res, μ0m; auxmat=auxmat, Sm=Sm, μ0f=μ0f1), initμ0; method=method, autodiff=autodiff,
                    xtol=innerxtol, ftol=innerftol, iterations=iterations)
        μ0m .= solM.zero
        
        solF = NLsolve.nlsolve((res, μ0f) -> MMsysF!(res, μ0f; auxmat=auxmat, Sf=Sf, μ0m=μ0m), initμ0; method=method, autodiff=autodiff,
                    xtol=innerxtol, ftol=innerftol, iterations=iterations)
        μ0f2 .= solF.zero
        d = norm(μ0f1 - μ0f2, Inf)
        μ0f1 .= μ0f2
        i += 1
    end
    println("i = ", i, ", d = ", d)
    return abs.(μ0m), abs.(μ0f1)
end

I think there is some confusion when you assign one variable to another. When you write μ0f1 = Sf you have two variables pointing to the same data in memory (updating μ0f1 will also update Sf). If you don’t want Sf to change you can write μ0f1 = copy(Sf). Also here I use the .= notation which takes all the values on the right and assigns them to the same positions on the left (assuming the right and left variables are already defined).

There’s still something weird happening with the @code_warntype output. It looks like a temporary variable is being defined or something?

@code_warntype ipfp(Π, Sm, Sf; initμ0, auxmat)
...
Locals
  @_7::Any
...

I think it has something to do with the nlsolve call but I’m not sure.

ChrisRackauckas · August 10, 2023, 9:04am

If you profile, what’s taking all of the time? I presume setting up multiple nonlinear solves is not the way to go. Also, generally for large problems using NonlinearSolve and specializing the linear solver towards something for large (sparse) Jacobians is required for anything large.

amrods · August 10, 2023, 10:29am

@profview doesn’t seem to be working, the pane shows up blank. I have tried using NonlinearSolve, but I can’t figure out how to specify the problem

ctkelley · August 10, 2023, 11:15am

While it would be some extra work, Newton-Krylov might help you. There’s some code for that here.

ChrisRackauckas · August 10, 2023, 11:24am

What did you try? It’s almost exactly the same as NLsolve in the high level interface, you just add p (which you can then reuse to remake).

gdalle · August 10, 2023, 5:15pm

This used to be a bug in the VSCode extension but it was fixed recently. Upgrading the extension should suffice, but if not, does the flame graph appear when you re-run the profiling command a second time?

amrods · August 10, 2023, 6:55pm

I wanted to reuse the work I have done. In particular the functions MMsysM! and MMsysF!.
Those functions are called as MMsysM!(res, μ0m; auxmat, Sm, μ0f), where the kw arguments are basically parameters, however auxmat and Sm don’t change for a particular problem.
I did this:

probM = NonlinearProblem{isinplace}((res, μ0m, μ0f) -> MMsysM!(res, μ0m; auxmat, Sm, μ0f), initμ0, μ0f)

I am not sure whether the mutating argument is supposed to be the first one. Also, I am not sure how to change the parameters after probM is defined. I couldn’t find that in the documentation.

amrods · August 10, 2023, 7:00pm

I didn’t know I had to run it twice. I was giving up too easily. How can I export the graph so I can upload it here? (And how do I read interpret it). Here is a link to the html file.

gdalle · August 10, 2023, 7:30pm

You shouldn’t have to run it twice, that’s why it’s a bug

I’m not sure how you did it, but it looks like you managed just fine by linking to the HTML. It’s much more useful than a static screenshot anyway because it’s interactive.
And in VSCode it’s even nicer because when you control + click the profile it leads you to the relevant line in your code.

https://www.julia-vscode.org/docs/stable/userguide/profiler/

Roughly speaking, you want the tiles to be:

mostly blue, because yellow = allocations (your code uses a lot of memory) and red = runtime dispatch (your code struggles to infer types)
related to the functions where you expect to be spending time (that’s problem-specific)

At first glance, your profile doesn’t scream “underoptimized” to me, because most of the time is spent in mathematical operations, and the red tiles are related to autodiff (thus sometimes unavoidable). But I don’t know the details.

To go further, check out this blog post, which is the best resource I know on optimizing Julia code. It has a profiling section and much more:

https://viralinstruction.com/posts/optimise/#how_to_optimise_julia_code_a_practical_guide

amrods · August 10, 2023, 7:36pm

It seems the fix for the bug hasn’t found its way into a release. I have version 1.47.2. I didn’t want to install a pre-release version. Thanks for the resources.

ChrisRackauckas · August 10, 2023, 8:02pm

It looks like just μ0f is your parameters? Then you can just enclose the others

probM = NonlinearProblem{isinplace}((res, μ0f) -> MMsysM!(res, μ0m; auxmat, Sm, μ0f), initμ0, μ0f)

amrods · August 10, 2023, 9:50pm

I don’t need to specify the arguments? In the example in the documentation f(u, p), u is the input of the function, and p the parameters. That gets translated to probN = NonlinearProblem(f, u0, p).
In my case, μ0m is the input, μ0f the parameters, but res is the array where the residuals of the system are stored. Just to clarify, writing isinplace takes care of all that so that I can specify the function as you did?

ChrisRackauckas · August 10, 2023, 10:05pm

Do you have an MWE? I’m trying to run the following:

using NLsolve
using LinearAlgebra
using Random

Random.seed!(1293804)

const Z = 46
const δ = 0.1
const β = 0.95

T(am, af) = Z - max(am, af) + 1

const IDX_ae = CartesianIndices((1:46, 1:2))

function insideproduct_num(am, em, af, ef; μ0m, μ0f)
    exponent = β*(1 - δ)
    power = 1.0
    p = zero(promote_type(eltype(μ0m), eltype(μ0f)))
    for k in 0:T(am, af)-1
        p += 0.5 * power * log(abs.(μ0m[am + k, em] * μ0f[af + k, ef]))
        power *= exponent
    end
    return exp(p)
end

function insideproduct_den(am, em, af, ef; Sm, Sf)
    exponent = β*(1 - δ)
    power = 1.0
    p = zero(promote_type(eltype(Sm), eltype(Sf)))
    for k in 0:T(am, af)-1
        p += 0.5 * power * log(abs.(Sm[am + k, em] * Sf[af + k, ef]))
        power *= exponent
    end
    return exp(p)
end

function fillauxmat!(auxmat; Π, Sm, Sf)
    for idx in CartesianIndices(auxmat)
        am, em, af, ef = Tuple(idx)
        auxmat[am, em, af, ef] = Π[am, em, af, ef] * sqrt(Sm[am, em]*Sf[af, ef]) /
                insideproduct_den(am, em, af, ef; Sm=Sm, Sf=Sf)
    end
    return auxmat
end

@views function MMsysM!(res, μ0m; auxmat, Sm, μ0f)
    for idx_ae in IDX_ae
        am, em = Tuple(idx_ae)
        s = zero(eltype(μ0m))
        for idx_fi in IDX_ae
            s += auxmat[am, em, idx_fi[1], idx_fi[2]] * insideproduct_num(am, em, idx_fi[1], idx_fi[2]; μ0m=μ0m, μ0f=μ0f)
        end
        res[am, em] = Sm[am, em] - μ0m[am, em] - s
    end
    return res
end

@views function MMsysF!(res, μ0f; auxmat, Sf, μ0m)
    for idx_ae in IDX_ae
        af, ef = Tuple(idx_ae)
        s = zero(eltype(μ0f))
        for idx_mi in IDX_ae
            s += auxmat[idx_mi[1], idx_mi[2], af, ef] * insideproduct_num(idx_mi[1], idx_mi[2], af, ef; μ0m=μ0m, μ0f=μ0f)
        end
        res[af, ef] = Sf[af, ef] - μ0f[af, ef] - s
    end
    return res
end

function ipfp(Π, Sm, Sf; initμ0, auxmat, tol=1e-3, method=:trust_region, autodiff=:forward, iterations=1_000,
        innerxtol=1e-3, innerftol=1e-3)
    i = 0
    μ0f1 = Sf
    μ0m = initμ0
    d = Inf
    fillauxmat!(auxmat; Π=Π, Sm=Sm, Sf=Sf)
    while d > tol && i <= iterations
        solM = NLsolve.nlsolve((res, μ0m) -> MMsysM!(res, μ0m; auxmat=auxmat, Sm=Sm, μ0f=μ0f1), initμ0; method=method, autodiff=autodiff,
                    xtol=innerxtol, ftol=innerftol, iterations=iterations)
        μ0m = solM.zero
        solF = NLsolve.nlsolve((res, μ0f) -> MMsysF!(res, μ0f; auxmat=auxmat, Sf=Sf, μ0m=μ0m), initμ0; method=method, autodiff=autodiff,
                    xtol=innerxtol, ftol=innerftol, iterations=iterations)
        μ0f2 = solF.zero
        d = norm(μ0f1 - μ0f2, Inf)
        μ0f1 = μ0f2
        i += 1
    end
    println("i = ", i, ", d = ", d)
    return abs.(μ0m), abs.(μ0f1)
end


Π = 5 .* rand(46, 2, 46, 2)
Sm = 5 .* rand(46, 2)
Sf = 5 .* rand(46, 2)
μ0m = similar(Sm)
μ0f = similar(Sf)
auxmat = similar(Π)
initμ0 = 5 .* ones(size(Sm))

ipfp(Π, Sm, Sf; initμ0, auxmat)

but it just runs forever.

amrods · August 10, 2023, 10:44pm

Sorry about that. This should work:

using NLsolve
using LinearAlgebra
using Random

Random.seed!(1293804)

const Z = 46
const δ = 0.1
const β = 0.95

T(am, af) = Z - max(am, af) + 1

const IDX_ae = CartesianIndices((1:46, 1:2))

function insideproduct_num(am, em, af, ef; μ0m, μ0f)
    exponent = β*(1 - δ)
    power = 1.0
    p = zero(promote_type(eltype(μ0m), eltype(μ0f)))
    for k in 0:T(am, af)-1
        p += 0.5 * power * log(abs.(μ0m[am + k, em] * μ0f[af + k, ef]))
        power *= exponent
    end
    return exp(p)
end

function insideproduct_den(am, em, af, ef; Sm, Sf)
    exponent = β*(1 - δ)
    power = 1.0
    p = zero(promote_type(eltype(Sm), eltype(Sf)))
    for k in 0:T(am, af)-1
        p += 0.5 * power * log(Sm[am + k, em] * Sf[af + k, ef])
        power *= exponent
    end
    return exp(p)
end

function fillauxmat!(auxmat; Π=Π, Sm, Sf)
    for idx in CartesianIndices(auxmat)
        am, em, af, ef = Tuple(idx)
        auxmat[am, em, af, ef] = Π[am, em, af, ef] * sqrt(Sm[am, em]*Sf[af, ef]) /
                insideproduct_den(am, em, af, ef; Sm=Sm, Sf=Sf)
    end
    return auxmat
end

function MMsysM!(res, μ0m; auxmat, Sm, μ0f)
    for idx_ae in IDX_ae
        am, em = Tuple(idx_ae)
        s = zero(eltype(μ0m))
        for idx_fi in IDX_ae
            s += auxmat[am, em, idx_fi[1], idx_fi[2]] * insideproduct_num(am, em, idx_fi[1], idx_fi[2]; μ0m=μ0m, μ0f=μ0f)
        end
        res[am, em] = Sm[am, em] - μ0m[am, em] - s
    end
    return res
end

function MMsysF!(res, μ0f; auxmat, Sf, μ0m)
    for idx_ae in IDX_ae
        af, ef = Tuple(idx_ae)
        s = zero(eltype(μ0f))
        for idx_mi in IDX_ae
            s += auxmat[idx_mi[1], idx_mi[2], af, ef] * insideproduct_num(idx_mi[1], idx_mi[2], af, ef; μ0m=μ0m, μ0f=μ0f)
        end
        res[af, ef] = Sf[af, ef] - μ0f[af, ef] - s
    end
    return res
end

function ipfp(Π, Sm, Sf; initμ0, auxmat, tol=1, method=:trust_region, autodiff=:forward, iterations=1_000,
        innerxtol=1, innerftol=1)
    i = 0
    μ0f1 = copy(Sf)
    μ0f2 = copy(Sf)
    μ0m = initμ0
    d = Inf
    fillauxmat!(auxmat; Π=Π, Sm=Sm, Sf=Sf)
    while d > tol && i <= iterations
        solM = NLsolve.nlsolve((res, μ0m) -> MMsysM!(res, μ0m; auxmat=auxmat, Sm=Sm, μ0f=μ0f1), initμ0; method=method, autodiff=autodiff,
                    xtol=innerxtol, ftol=innerftol, iterations=iterations)
        μ0m .= solM.zero
        solF = NLsolve.nlsolve((res, μ0f) -> MMsysF!(res, μ0f; auxmat=auxmat, Sf=Sf, μ0m=μ0m), initμ0; method=method, autodiff=autodiff,
                    xtol=innerxtol, ftol=innerftol, iterations=iterations)
        μ0f2 .= solF.zero
        d = norm(μ0f1 - μ0f2, Inf)
        μ0f1 .= μ0f2
        i += 1
    end
    print("iter = ", i, ", tol = ", d)
    return abs.(μ0m), abs.(μ0f1)
end

Π1 = rand(46, 2, 46, 2)
Sm1 = rand(46, 2)
Sf1 = rand(46, 2)
μ0m = similar(Sm1)
μ0f = similar(Sf1)
auxmat = zeros(size(Π1))
initμ0 = 1 .* ones(size(Sm1))
res1 = similar(initμ0)

@time sol1 = ipfp(Π1, Sm1, Sf1; initμ0=initμ0, auxmat=auxmat, tol=1e-4)

I’m trying to test the somewhat full system, but with toy inputs to assess speed.

amrods · August 13, 2023, 1:16am

Two further questions:

I noticed that solving the system slows down for “larger” values of S_m and S_f. For example:

Sm5 = 5 .* Sm1
Sf5 = 5 .* Sf1

Since the system is homogeneous of degree zero in S_m, S_f, \mu^{0m}, \mu^{0f}, is it ok to scale down the system, solve it (presumably quicker) and then scale it back up? I note that all those variables must be in the same units.

The way this generalization of IPFP works is by sequentially solving subsystems until convergence. I thought of having the tolerance of solving those subsystems depend on how close the global algorithm is from convergence. That way, when the global algorithm is far from convergence, the subsystems do not need to be solved to high precision (and so will solve quickly), but when the global algorithm is close to convergence, then subsystems will be solved to higher precision. I modified the ipfp function to:

function ipfp_ad(Π, Sm, Sf; initμ0, auxmat, tol=1, method=:trust_region, autodiff=:forward, iterations=1_000)
    i = 0
    μ0f1 = copy(Sf)
    μ0f2 = copy(Sf)
    μ0m = initμ0
    d = Inf
    fillauxmat!(auxmat; Π=Π, Sm=Sm, Sf=Sf)
    while d > tol && i <= iterations
        if d > 1
            solM = NLsolve.nlsolve((res, μ0m) -> MMsysM!(res, μ0m; auxmat=auxmat, Sm=Sm, μ0f=μ0f1), initμ0; method=method, autodiff=autodiff,
                        xtol=1, ftol=1, iterations=iterations)
            μ0m .= solM.zero
            solF = NLsolve.nlsolve((res, μ0f) -> MMsysF!(res, μ0f; auxmat=auxmat, Sf=Sf, μ0m=μ0m), initμ0; method=method, autodiff=autodiff,
                        xtol=1, ftol=1, iterations=iterations)
            μ0f2 .= solF.zero
            d = norm(μ0f1 - μ0f2, Inf)
            μ0f1 .= μ0f2
        else
            solM = NLsolve.nlsolve((res, μ0m) -> MMsysM!(res, μ0m; auxmat=auxmat, Sm=Sm, μ0f=μ0f1), initμ0; method=method, autodiff=autodiff,
                        xtol=d, ftol=d, iterations=iterations)
            μ0m .= solM.zero
            solF = NLsolve.nlsolve((res, μ0f) -> MMsysF!(res, μ0f; auxmat=auxmat, Sf=Sf, μ0m=μ0m), initμ0; method=method, autodiff=autodiff,
                        xtol=d, ftol=d, iterations=iterations)
            μ0f2 .= solF.zero
            d = norm(μ0f1 - μ0f2, Inf)
            μ0f1 .= μ0f2
        end
        i += 1
    end
    print("iter = ", i, ", tol = ", d)
    return abs.(μ0m), abs.(μ0f1)
end

What do people think of those 2 things?

amrods · August 19, 2023, 3:19pm

anyone?

Topic		Replies	Views
Help optimizing IPFP algorithm Performance nlsolve	6	569	January 13, 2022
Julia equivalent for python scipy.optimize.fsolve General Usage package , roots	29	4679	September 7, 2020
Domain error using nlsolve New to Julia	27	2972	May 10, 2020
Differences between NLsolve and Optim in solving system of equations Optimization (Mathematical)	24	3039	November 16, 2022
Nlsolve for find root for a system of ODE for an implicit runge kutta method New to Julia package	32	1920	July 20, 2022

Speed up solution of large system of equations

Related topics