Memory allocation and performance

Hi guys,

I’m using Julia to compute a dynamic programming model. There is one particular part of the model that takes a long time to run (about 15 seconds, which is a lot, given that there is an iterative procedure involved). I ran the @time macro on it and the output is:

15.600687 seconds (75.16 M allocations: 31.067 GiB, 43.40% gc time)

This part of the code is as follows:

function solv_active(pa::Param,pr::Prices,q::Array{<:Real},vn_ret::Array{<:Real},vh_ret::Array{<:Real},vd_ret::Array{<:Real})
    # vn_ret is the value function of retired renters in the first period
    # vh_ret is the value function of retired owners in the first period
    # vd_ret is the value function of retired defaulters in the first period
    @unpack Jret, J, β, ϑ, H_num, na, nϵ, nb, b_grid, ϵ_chain, b_min, M_num, infm = pa
    @unpack q_b, w = pr
    ϵ_grid = ϵ_chain.state_values   # idiosyncratic prod states
    nw = Jret-1                     # number of working periods

    # Renters - array pre-allocation
    vn_act   = Array{Float64}(undef,nϵ,na,nb,nw)       # value function
    cnf_act  = similar(vn_act)                   # consumption function
    bnf_act  = similar(vn_act)                   # asset function
    htnf_act = similar(vn_act)                   # rent decision
    hnf_act  = similar(vn_act)                   # house decision
    mnf_act  = similar(vn_act)                   # mortgage decision
    go_act   = Array{Int}(undef,nϵ,na,nb,nw)         # decision to own

    # Homeowners - array pre-allocation
    vh_act   = Array{Float64}(undef,nϵ,na,M_num,H_num,nb,nw)          # value function
    chf_act  = similar(vh_act)                      # consumption function
    bhf_act  = similar(vh_act)                      # asset function
    hthf_act = similar(vh_act)                      # rent decision
    hhf_act  = similar(vh_act)                      # house decision
    mhf_act  = similar(vh_act)                      # mortgage decision
    πf_act   = similar(vh_act)                      # mortgage payment function
    gh_act   = Array{Int}(undef,nϵ,na,M_num,H_num,nb,nw) # decision

    # Defaulters - array pre-allocation
    vd_act   = Array{Float64}(undef,nϵ,na,M_num,nb,nw)          # value function
    cdf_act  = similar(vd_act)                   # consumption function
    bdf_act  = similar(vd_act)                   # asset function
    htdf_act = similar(vd_act)                   # rent decision
    πdf_act  = similar(vd_act)                   # mortgage balance

    # last year of active life
    Threads.@threads for ib = 1:nb
        # renter
        for ia = 1:na, iϵ = 1:nϵ
            bn,vn,cn,htn,hn,mn,gn      = solve_act_last_rent(pa,pr,ib,iϵ,ia,q[iϵ,ia,:,:,:,Jret-1],vn_ret[iϵ,ia,:],vh_ret[iϵ,ia,:,:,:])
            vn_act[iϵ,ia,ib,end]       = vn    # value function
            cnf_act[iϵ,ia,ib,end]      = cn    # consumption function
            bnf_act[iϵ,ia,ib,end]      = bn    # bond function
            htnf_act[iϵ,ia,ib,end]     = htn   # decision to rent
            hnf_act[iϵ,ia,ib,end]      = hn    # own house
            mnf_act[iϵ,ia,ib,end]      = mn    # mortage decision
            go_act[iϵ,ia,ib,end]       = gn    # decision to own
        end
    end

    Threads.@threads for ib = 1:nb
        # homeowner
        for ih = 1:H_num, im = 1:M_num, ia = 1:na, iϵ = 1:nϵ
            bh,vh,ch,hth,hh,mh,πh,gh       = solve_act_last_home(pa,pr,ib,iϵ,ih,im,ia,q[iϵ,ia,:,:,:,Jret-1],vn_ret[iϵ,ia,:],vh_ret[iϵ,ia,:,:,:],vd_ret[iϵ,ia,:,:])
            vh_act[iϵ,ia,im,ih,ib,end]     = vh   # value function
            chf_act[iϵ,ia,im,ih,ib,end]    = ch   # consumption function
            bhf_act[iϵ,ia,im,ih,ib,end]    = bh   # bond function
            hthf_act[iϵ,ia,im,ih,ib,end]   = hth  # decision to rent
            hhf_act[iϵ,ia,im,ih,ib,end]    = hh   # decision to own home
            mhf_act[iϵ,ia,im,ih,ib,end]    = mh   # mortgage decision
            πf_act[iϵ,ia,im,ih,ib,end]     = πh   # mortgage payment function
            gh_act[iϵ,ia,im,ih,ib,end]     = gh   # decision to own, sell or rent
        end
    end

    Threads.@threads for ib = 1:nb
        # defaulter
        for im = 1:M_num, ia = 1:na, iϵ = 1:nϵ
            bd,vd,cd,htd,πd                 = solve_act_last_def(pa,pr,ib,iϵ,im,ia,vn_ret[iϵ,ia,:],vd_ret[iϵ,ia,:,:])
            vd_act[iϵ,ia,im,ib,end]         = vd    # value function
            cdf_act[iϵ,ia,im,ib,end]        = cd    # consumption function
            bdf_act[iϵ,ia,im,ib,end]        = bd    # bond function
            htdf_act[iϵ,ia,im,ib,end]       = htd   # rent function
            πdf_act[iϵ,ia,im,ib,end]        = πd    # payment function
        end
    end

    # Remaining years of active life
    for ij = 1:nw-1
        Threads.@threads for ib = 1:nb
            # Renter
            for ia = 1:na, iϵ = 1:nϵ
                bn,vn,cn,htn,hn,mn,gn      = solve_act_rent(pa,pr,ib,iϵ,ia,nw-ij,q[iϵ,ia,:,:,:,Jret-1-ij],vn_act[:,ia,:,end-ij+1],vh_act[:,ia,:,:,:,end-ij+1])
                vn_act[iϵ,ia,ib,end-ij]    = vn     # value function
                cnf_act[iϵ,ia,ib,end-ij]   = cn     # consumption function
                bnf_act[iϵ,ia,ib,end-ij]   = bn     # bond function
                htnf_act[iϵ,ia,ib,end-ij]  = htn    # decision to rent
                hnf_act[iϵ,ia,ib,end-ij]   = hn     # decision to own house
                mnf_act[iϵ,ia,ib,end-ij]   = mn     # mortgage decision
                go_act[iϵ,ia,ib,end-ij]    = gn     # decision to own
            end
        end

        Threads.@threads for ib = 1:nb
            # Homeowner
            for ih = 1:H_num, im = 1:M_num, ia = 1:na, iϵ = 1:nϵ
                bh,vh,ch,hth,hh,mh,πh,gh        = solve_act_home(pa,pr,ib,iϵ,ih,im,ia,nw-ij,q[iϵ,ia,:,:,:,Jret-1-ij],vn_act[:,ia,:,end-ij+1],vh_act[:,ia,:,:,:,end-ij+1],vd_act[:,ia,:,:,end-ij+1])
                vh_act[iϵ,ia,im,ih,ib,end-ij]   = vh    # value function
                chf_act[iϵ,ia,im,ih,ib,end-ij]  = ch    # consumption function
                bhf_act[iϵ,ia,im,ih,ib,end-ij]  = bh    # bond function
                hthf_act[iϵ,ia,im,ih,ib,end-ij] = hth   # decision to rent
                hhf_act[iϵ,ia,im,ih,ib,end-ij]  = hh    # decision to own house
                mhf_act[iϵ,ia,im,ih,ib,end-ij]  = mh    # mortgage decision
                πf_act[iϵ,ia,im,ih,ib,end-ij]   = πh    # mortgage payment function
                gh_act[iϵ,ia,im,ih,ib,end-ij]   = gh    # decision to keep owning, selling and renting or buying another house
            end
        end

        Threads.@threads for ib = 1:nb
            # Defaulter
            for im = 1:M_num, ia = 1:na, iϵ = 1:nϵ
                bd,vd,cd,htd,πd                    = solve_act_def(pa,pr,ib,iϵ,im,ia,nw-ij,vn_act[:,ia,:,end-ij+1],vd_act[:,ia,:,:,end-ij+1])
                vd_act[iϵ,ia,im,ib,end-ij]         = vd    # value function
                cdf_act[iϵ,ia,im,ib,end-ij]        = cd    # consumption function
                bdf_act[iϵ,ia,im,ib,end-ij]        = bd    # bond function
                htdf_act[iϵ,ia,im,ib,end-ij]       = htd   # rent function
                πdf_act[iϵ,ia,im,ib,end-ij]        = πd    # payment function
            end
        end
    end

    return vn_act, cnf_act, bnf_act, htnf_act, hnf_act, mnf_act, go_act,            # renter functions
            vh_act, chf_act, bhf_act, hthf_act, hhf_act, mhf_act, πf_act, gh_act,   # homeowner
            vd_act, cdf_act, bdf_act, htdf_act, πdf_act                             # defaulter
end

So what I’m doing here is: for each point in the state space and for each agent type, I solve a dynamic programming problem. The solution, the maximizer and other info are stored in a matrix. This function is type stable (I ran @code_warntype on it) and all sub-functions are also type stable.

From the @time output, the biggest issue seems to be memory allocations and garbage collection, although I’m not entirely sure how I’m going to solve that. Perhaps there is a better way to organize this function that I’m overlooking?

Have you profiled the code to see where the time is spent? Profiling allocations is somewhat trickier, but I guess you’re not after the allocations for their own sake but for their potential impact on runtime so it’s probably still a good idea to check where exactly your code spends time.

Also, I might be missing something here, but it seems like most of the action in your code is in the solve_xxx functions, which presumably set up the value function, do the numerical integration etc?

This statement (and every other case where you index with : or any other range, creates a copy of that part of the array. That’s probably where all your allocations are coming from.

If you don’t actually need a copy, you can do:

@view q[iϵ,ia,:,:,:,Jret-1-ij]

which creates an extremely lightweight reference to the data in the original array, rather than copying it.

You can also use @views to convert all indexing expressions in a given block of code to views, but I would suggest using @view directly just so you can see exactly what it’s doing and how it affects your code.

There are also a lot of places where you slice your arrays along their last dimension like this. That’s the right thing to do in C/C++/numpy where arrays are row-major, but it’s the least efficient way to slice an array in Julia/Fortran/Matlab where arrays are column-major.

If you can rearrange your data structures so that you are mostly slicing along the first dimension, then you’ll get better memory locality and therefore better performance.

Thanks for your comments.

  1. I am profiling the code right now, but it is very difficult to see the profiling in the REPL, given that at some point the nested function calls are omitted and then I can’t see them. Is there a better to do this?

  2. Yeah, most of the action comes from the solve_xxx functions. I am working on those, but the reason that they take some time are the solution algorithms rather than any strict programming problem. For example, it takes a lot of iterations to solve some of the problems inside those functions. I am trying to cut back on that by changing the algorithms or providing better initial guesses, for example.

Never tried to profile anything at the REPL, but you should probably use https://github.com/timholy/ProfileView.jl or @profiler in Juno

My point about the solve_xxx functions was that most of the allocations might come from there, which would make it impossible for us to help given we don’t see what’s going on there (e.g. are you creating lots of temporary arrays in there, could you be using in-place operations, make use of @views etc)

2 Likes

I like to use PProf:
A bit more info about it in my post: https://opensourc.es/blog/constraint-solver-profiling/index.html
Make sure to skip over the constraint solver stuff :smiley:

So try

 julia --track-allocation=user

and:

julia> using Revise, Profile, PProf
julia> include("your_file")
julia> your_function()
julia> @profile your_function()
julia> pprof(;webport=58599)
1 Like

Thanks for your comments.

  1. So in case I use view, I have to redefine the argument types of the solve_xxx functions. Is it efficient to use:
function solve_xxxx (arguments, v::Union{SubArray{<:Real},Array{<:Real}})

So that I may use both types of inputs if I wish? Is there a more appropriate formulation?

  1. I’ll will reformulate the code so as to slice the first dimension rather than the last. Thanks

Even better, you can do v::AbstractArray{<:Real} which will be just as efficient and will accept any array-like container.

Or if you want to accept any array-like container of exactly one dimension, you can do ::AbstractArray{<:Real, 1}.

See Passing arrays with unspecified type for related discussion.