Speeding up repeated optimizations within a large loop

Hi there! I’m trying to solve an economic model with many discrete and continuous choices and a fairly large state space. My approach essentially boils down to solving several constrained optimization problems, using Optim.optimize(), picking the choice that yields the highest objective value, and then repeating this process for each point in the state space.

I’ve posted an MWE below, which takes around 4-5 seconds to run on my laptop. (EDIT: I simplified the original MWE quite a bit, on the advice of @stillyslalom and @DNF, so hopefully now it is easier to digest.)

I’m hoping to speed this code up. In particular, I was wondering whether there are any strategies for reducing the number of allocations when using Optim.optimize() repeatedly. At this stage, I would like to avoid parallelizing the large outer loop, because eventually it will be part of an even larger outer loop, to be parallelized.

Any help with speedups would be hugely appreciated!

using Parameters,Optim
using TimerOutputs

# parameters
parameter_defaults_test = @with_kw (
        β=0.91,
        ϕ=0.12,
        σ=2.0,
        ϑ=1/1.25,
        B=100.0,
        Wlux=7.7,
        δ=0.015,
        κm=2/52,
        κh=0.07,
        κi=0.05,
        γ=0.1,
        r=0.03,
        rh=r+0.01,
        ri=rh+0.006,
        ζ=0.4,
        ω=1.0)
# grids
agrid = collect(range(0,stop=40,length=26))
mgrid = collect(range(0,stop=5.87,length=15))
hgrid =  [1.56,3.11,5.87]
zgrid = [-1.645,-0.823,0.0,0.823,1.645]
grids = (agrid=agrid,mgrid=mgrid,hgrid=hgrid,zgrid=zgrid)
# aggregate prices (tests)
prices = @with_kw (P=1.0,R=0.05*P)
############################################
# Utility function
############################################
function FlowUtility(;
    ap=0,
    hr=0,
    hp=0,
    hip=0,
    indiv_state::NamedTuple,
    prices::NamedTuple,
    parameters::NamedTuple)

    @unpack ϕ,ϑ,σ,ω,γ,r,rh,ri,δ,κm,κh,κi = parameters
    @unpack a,h,mh,hi,mi,y = indiv_state
    @unpack P,R = prices

    # consumption
    c = y + (1+r)*a + (1-γ)*R*hip +
        (1-δ-ifelse(hp!=h,κh,0))*P*h + (1-δ-ifelse(hip!=hi,κi,0))*P*hi -
        (1+rh)*mh - (1+ri)*mi - P*hp - P*hip -
        ifelse(hp!=h && hp>0,κm,0) - ifelse(hip!=hi && hi>0,κm,0) -
        ap
    if c<=0
        utility = -10000.0
    else
        utility = ((1-ϕ)*c^(1-ϑ)+ϕ*(ω*hp)^(1-ϑ))^((1-σ)/(1-ϑ))/(1-σ)
    end
    return utility
end
############################################
# Bequest motive function
############################################
function ν(;
    ap,
    hp,
    hip,
    prices,
    parameters)
    @unpack σ,r,γ,B,Wlux = parameters
    @unpack P,R = prices
    return (B/(1-σ))*((1+r)*ap+P*(hp+hip)+Wlux)^(1-σ)
end
############################################
# INVESTOR'S PROBLEM IN LAST PERIOD OF LIFE
############################################
function solveVILastPeriod(parameters::NamedTuple,prices::NamedTuple,grids::NamedTuple)

    @unpack β,ζ,r = parameters
    @unpack δ,rh,ri,γ,κm,κh,κi = parameters
    @unpack P,R = prices
    @unpack agrid,mgrid,hgrid,zgrid = grids

    na=length(agrid)
    nh=length(hgrid)
    nm=length(mgrid)
    nz=length(zgrid)

    reset_timer!()
    @timeit "prealloc" begin
        # Preallocate value function and policy function arrays
        VI= Array{Float64,6}(undef, (na,nh,nm,nh,nm,nz));
        apolI = Array{Float64,6}(undef, (na,nh,nm,nh,nm,nz));
        hpolI = Array{Int64,6}(undef, (na,nh,nm,nh,nm,nz));
    end

    # Upper bound on liquid assets
    amax=agrid[end]

    # INVESTOR'S PROBLEM: V^I
    InvestorsCartesianIndex  = CartesianIndices((1:na,1:nh,1:nm,1:nh,1:nm,1:nz));
    @inbounds for s in InvestorsCartesianIndex
        VV=-10000.0
        aopt=-1.0
        hopt=-1
        ia = s[1]
        ih = s[2]
        imh = s[3]
        ihi = s[4]
        imi = s[5]
        iz = s[6]
        y = ζ*exp(0.96+zgrid[iz])
        a = agrid[ia]
        h = hgrid[ih]
        mh= mgrid[imh]
        hi= hgrid[ihi]
        mi= mgrid[imi]
        for (ihp,hp) in enumerate(hgrid)
            apmax = y + (1+r)*a + (1-δ-κi)*P*hi + (1-δ-ifelse(ihp!=ih,κh,0))*P*h -
                    P*hp - (1+rh)*mh - (1+ri)*mi - ifelse(ihp!=ih,κm,0)
            if apmax < 0 # infeasible
                v_temp=-10000.0
            else
                @timeit "objective" obj_sellinv(ap)= FlowUtility(ap=ap,hp=hp,
                                             indiv_state=(a=a,h=h,mh=mh,hi=hi,mi=mi,y=y),
                                             prices=(P=P,R=R),
                                             parameters=parameters) +
                                             β*ν(ap=ap,hp=hp,hip=0,prices=(P=P,R=R),parameters=parameters)
                @timeit "Optim" res = maximize(obj_sellinv,0,minimum([apmax,amax]))
                v_temp = Optim.maximum(res)
                apopt_temp = Optim.maximizer(res)
            end
            if v_temp>VV
                VV=v_temp
                aopt=apopt_temp
                hopt=ihp
            end
        end
        VI[s]=VV
        apolI[s]=aopt
        hpolI[s]=hopt
    end
    print_timer()
    return VI,apolI,hpolI
end
############################################
# Tests
############################################
solveVILastPeriod(parameter_defaults_test(),prices(),grids);

Below is the output from TimerOutputs, which shows that most of the time is spent within Optim's functions.

  ──────────────────────────────────────────────────────────────────────
                               Time                   Allocations      
                       ──────────────────────   ───────────────────────
   Tot / % measured:        4.60s / 86.7%            223MiB / 93.3%    

 Section       ncalls     time   %tot     avg     alloc   %tot      avg
 ──────────────────────────────────────────────────────────────────────
 Optim           735k    3.99s   100%  5.42μs    202MiB  97.1%     288B
   objective    11.8M    2.50s  62.7%   212ns     0.00B  0.00%    0.00B
 prealloc           1   21.8μs  0.00%  21.8μs   6.03MiB  2.90%  6.03MiB
 ──────────────────────────────────────────────────────────────────────

Regarding your MWE, it seems like it could be boiled down considerably further. You’re looping over a 6-dimensional grid of conditions, and solving 12 similar optimization problems (per my count) at each grid point. Can you trim that to a single optimization problem at one set of conditions (which should require no more than a few dozen lines of code)? The repeated structure means that any improvements should be easy to generalize.

Is there a particular reason that you chose Optim.jl (which is mostly meant for unconstrained optimization) instead of JuMP? The latter may be able to handle your constraints without resorting to brute force like

    if c<=0
        utility = -10000.0

@stillyslalom – thanks for your response. Yes, there are 13 sub-problems that I’m solving at each grid point. Each sub-problem has a slightly different objective function, so I’m not sure how I could trim/combine these into a single optimization problem. Any elaboration on what you had in mind would be great.

I will certainly look into using JuMP. I had previously read that JuMP was not designed to be used as a general-purpose nonlinear optimizer, which is why I went for Optim.jl.

It seems your objective function is not allocating, what is great. I would suggest using the profiler to figure out what is the costlier step now. Are your derivatives being calculated by ForwardDiff? It might be worth checking out if there are any opportunities to make the derivatives faster, like exploiting some constraint that is somehow missed by FormardDiff. Could you approximate the Hessian, for instance? Or maybe find a better initial guess for the solution?

You don’t need to combine everything into one problem, just pick one or two of the problems that you think are representative. Fixing that one problem is likely to fix others as well, and then you can move on with those that still are slow.

Working on that big wall of code is just too overwhelming for most other posters, simplifying the problem is step number one.

1 Like

I’m using Brent’s method above, which is a derivative-free method. From my limited testing, Brent seems to be faster for my example compared to using derivative-based methods, even with ForwardDiff.

Aha, got it! I’ve considerably simplified the MWE (below).

using Parameters,Optim
using TimerOutputs

# parameters
parameter_defaults_test = @with_kw (
        β=0.91,
        ϕ=0.12,
        σ=2.0,
        ϑ=1/1.25,
        B=100.0,
        Wlux=7.7,
        δ=0.015,
        κm=2/52,
        κh=0.07,
        κi=0.05,
        r=0.03,
        rh=r+0.01,
        ri=rh+0.006,
        P=1.0)
hgrid =  [1.56,3.11,5.87]
############################################
# Utility function
############################################
function RHSLastPeriod(;ap=0,hp=0,hip=0,
    indiv_state::NamedTuple,
    parameters::NamedTuple)

    @unpack β,ϕ,ϑ,σ,γ,r,rh,ri,δ,κm,κh,κi,B,Wlux,P = parameters
    @unpack a,h,mh,hi,mi,y = indiv_state

    # consumption
    c = y + (1+r)*a + (1-δ-κi)*P*hi + (1-δ-ifelse(hp!=h,κh,0))*P*h - P*hp -
        (1+rh)*mh - (1+ri)*mi -  ifelse(hp!=h && hp>0,κm,0) - ap
    if c<=0
        utility = -10000.0
    else
        utility = ((1-ϕ)*c^(1-ϑ)+ϕ*(hp)^(1-ϑ))^((1-σ)/(1-ϑ))/(1-σ) +
                    β*(B/(1-σ))*((1+r)*ap+P*(hp+hip)+Wlux)^(1-σ)
    end
    return utility
end
################################################################################
# Optimization problem in last period of life: BASELINE
################################################################################
function vdisILastPeriod(
    parameters::NamedTuple,
    hgrid::Array{Float64,1},
    indiv_state::NamedTuple)

    @unpack β,r,δ,rh,ri,κm,κh,κi,P = parameters
    @unpack a,h,mh,hi,mi,y = indiv_state
    reset_timer!()
    # Upper bound on liquid assets
    amax=40.0
    VV=-10000.0
    aopt=-1.0
    for (ihp,hp) in enumerate(hgrid)
        apmax = y + (1+r)*a + (1-δ-κi)*P*hi + (1-δ-ifelse(hp!=h,κh,0))*P*h -
                P*hp - (1+rh)*mh - (1+ri)*mi - ifelse(hp!=h,κm,0)
        if apmax < 0 # infeasible
            v_temp=-10000.0
        else
            @timeit "objective" obj(ap) = RHSLastPeriod(ap=ap,hp=hp,
                                            indiv_state=indiv_state,
                                            parameters=parameters)
            @timeit "Optim" res = maximize(obj,0,minimum([apmax,amax]),Brent())
            v_temp = Optim.maximum(res)
            apopt_temp = Optim.maximizer(res)
        end
        if v_temp>VV
            VV=v_temp
            aopt=apopt_temp
        end
    end
    print_timer()
    return VV,aopt
end
############################################
# Tests
############################################
pt = parameter_defaults_test()
@time VV1,aopt1=vdisILastPeriod(pt,hgrid,
                (a=1.0,h=hgrid[1],mh=0.5,hi=hgrid[1],mi=0.5,y=0.4));

With timer output:

 ──────────────────────────────────────────────────────────────────────
                               Time                   Allocations      
                       ──────────────────────   ───────────────────────
   Tot / % measured:       65.8μs / 89.5%           2.28KiB / 61.0%    

 Section       ncalls     time   %tot     avg     alloc   %tot      avg
 ──────────────────────────────────────────────────────────────────────
 Optim              2   58.9μs   100%  29.4μs   1.39KiB  100%      712B
   objective       86   35.4μs  60.1%   411ns     0.00B  0.00%    0.00B

Oops, sorry! I didn’t realize this was 1-dimensional! :slight_smile: In this case I’m partial to Halley’s method, although I wouldn’t guess that would matter the most. Still would recommend looking at the result from Profiler.jl to get more information. On the numerical side you can still figure out if you can improve the initialization.