Large memory usage in JuMP

Hi, I am using JuMP to run this optimization script. It uses almost 3G memory for this small test dataset.

How can I figure out which part uses the most memory and how can I reduce the memory usage? I will use much larger data later and the memory usage explods when I run with a larger dataset.

13.817425 seconds (10.42 M allocations: 2.814 GiB, 6.32% gc time, 16.17% compilation time: 6% of which was recompilation)

using JLD
data = load("data.jld")["data"]

using JuMP, Ipopt
function ipopt_jump(data, iter=1000, tol=1e-3)

    X, Z, choice, obs_s = data
    N, L = size(Z)
    H, K = size(X)
    P = K*L

    m = Model(Ipopt.Optimizer)
    set_attributes(m, "tol" => tol, "max_iter" => iter)

    @variable(m, theta[1:P])
    @variable(m, delta[1:H])
    
    mu = @expression(m, X * reshape(theta, (K, L)) * Z')
    v = @expression(m, delta .+ mu)
    ev = @NLexpression(m, [h = 1:H, i = 1:N], exp(v[h, i]))
    evsum = @NLexpression(m, [i = 1:N], sum(ev[h, i] for h = 1:H) + 1)
    sih = @NLexpression(m, [h = 1:H, i = 1:N], ev[h, i] / evsum[i])
    ln_sih = @NLexpression(m, [h = 1:H, i = 1:N], log(sih[h, i]))

    @NLobjective(
        m,
        Max,
        sum(choice[h, i] * ln_sih[h, i] for h = 1:H, i = 1:N)
    )
    optimize!(m)

    return (objective_value(m), value.(theta), value.(delta))
end

So this is an example of too many @NLexpressions being a bad thing. Here’s a few options:

julia> using JLD, JuMP, Ipopt

julia> function ipopt_jump(data, iter=1000, tol=1e-3)
           X, Z, choice, obs_s = data
           N, L = size(Z)
           H, K = size(X)
           P = K*L
           m = Model(Ipopt.Optimizer)
           set_silent(m)
           set_attributes(m, "tol" => tol, "max_iter" => iter)
           @variable(m, theta[1:P])
           @variable(m, delta[1:H])
           mu = @expression(m, X * reshape(theta, (K, L)) * Z')
           v = @expression(m, delta .+ mu)
           ev = @NLexpression(m, [h = 1:H, i = 1:N], exp(v[h, i]))
           evsum = @NLexpression(m, [i = 1:N], sum(ev[h, i] for h = 1:H) + 1)
           sih = @NLexpression(m, [h = 1:H, i = 1:N], ev[h, i] / evsum[i])
           ln_sih = @NLexpression(m, [h = 1:H, i = 1:N], log(sih[h, i]))
           @NLobjective(
               m,
               Max,
               sum(choice[h, i] * ln_sih[h, i] for h = 1:H, i = 1:N)
           )
           optimize!(m)
           return objective_value(m), value.(theta), value.(delta)
       end
ipopt_jump (generic function with 3 methods)

julia> function ipopt_jump_2(data, iter=1000, tol=1e-3)
            X, Z, choice, obs_s = data
            N, L = size(Z)
            H, K = size(X)
            model = Model(Ipopt.Optimizer)
            set_silent(model)
            set_attributes(model, "tol" => tol, "max_iter" => iter)
            @variable(model, theta[1:K, 1:L])
            @variable(model, delta[1:H])
            @expression(model, v, delta .+ (X * theta * Z'))
            @NLexpression(model, evsum[i = 1:N], sum(exp(v[h, i]) for h = 1:H) + 1)
            @NLobjective(
                model,
                Max,
                sum(choice[h, i] * log(exp(v[h, i]) / evsum[i]) for h = 1:H, i = 1:N)
            )
            optimize!(model)
            return objective_value(model), value.(theta), value.(delta)
       end
ipopt_jump_2 (generic function with 3 methods)

julia> function ipopt_jump_3(data, iter=1000, tol=1e-3)
            X, Z, choice, obs_s = data
            N, L = size(Z)
            H, K = size(X)
            model = Model(Ipopt.Optimizer)
            set_silent(model)
            set_attributes(model, "tol" => tol, "max_iter" => iter)
            @variable(model, theta[1:K, 1:L])
            @variable(model, delta[1:H])
            @expression(model, v, delta .+ (X * theta * Z'))
            @NLexpression(model, evsum[i = 1:N], sum(exp(v[h, i]) for h = 1:H) + 1)
            choice_h = sum(choice; dims = 1)
            @NLobjective(
                model,
                Max,
                sum(choice[h, i] * v[h, i] for h = 1:H, i = 1:N) - 
                sum(choice_h[i] * log(evsum[i]) for i = 1:N)
            )
            optimize!(model)
            return objective_value(model), value.(theta), value.(delta)
       end
ipopt_jump_3 (generic function with 3 methods)

julia> data = load("/Users/Oscar/Downloads/data.jld")["data"]
([-0.849597862907133 -1.7383363775188911; -2.089774486959022 -1.7383363775188911; … ; -2.898726721588342 -1.7383363775188911; -0.9544864981929626 -1.7383363775188911], [-0.29486676502689874 -0.8116805331698356; -0.16866716595935305 -0.8116805331698356; … ; -0.042459422881323435 1.2320112883602086; -0.8992161280397112 -0.8116805331698356], [1.0 1.0 … 0.0 0.0; 0.0 0.0 … 0.0 0.0; … ; 0.0 0.0 … 0.0 0.0; 0.0 0.0 … 1.0 1.0], [0.010817307692307692, 0.01201923076923077, 0.0030048076923076925, 0.013221153846153846, 0.034254807692307696, 0.037259615384615384, 0.0985576923076923, 0.018629807692307692, 0.0030048076923076925, 0.1015625  …  0.0018028846153846155, 0.002403846153846154, 0.0018028846153846155, 0.016826923076923076, 0.0234375, 0.003605769230769231, 0.0018028846153846155, 0.012620192307692308, 0.009014423076923076, 0.013221153846153846])

julia> @time ipopt_jump(data)
  7.890690 seconds (9.31 M allocations: 2.742 GiB, 8.11% gc time, 3.98% compilation time)
(-3357.797666382641, [0.2559841072993847, 0.038228499459538955, -0.30623235102878604, 0.2442963856522662], [7.534984835933054, 7.53694802447915, 6.137492345290403, 7.737130209098217, 8.642269463783716, 8.644952456588703, 9.697474361284833, 8.063716077088952, 6.206819436841746, 9.767320964737333  …  5.654692937385994, 5.5702954669101885, 5.729337800983723, 7.921141245751071, 8.283112369637273, 6.438383853896593, 5.745189995044618, 7.5951786012008995, 7.033904140374328, 7.737473644111642])

julia> @time ipopt_jump_2(data)
  2.477510 seconds (2.31 M allocations: 644.749 MiB, 8.32% compilation time)
(-3357.7976663826416, [0.2559841072993807 -0.3062323510287861; 0.0382284994595422 0.24429638565226608], [7.534984835909796, 7.536948024455893, 6.137492345267145, 7.737130209074958, 8.642269463760458, 8.644952456565447, 9.697474361261577, 8.063716077065692, 6.206819436818488, 9.767320964714076  …  5.6546929373627375, 5.570295466886936, 5.729337800960466, 7.921141245727812, 8.283112369614013, 6.438383853873335, 5.745189995021359, 7.59517860117764, 7.033904140351074, 7.737473644088384])

julia> @time ipopt_jump_3(data)
  1.944815 seconds (1.57 M allocations: 574.916 MiB, 10.67% compilation time)
(-3357.797666382681, [0.25598410729938 -0.30623235102878654; 0.03822849945954291 0.24429638565226683], [7.53498483591045, 7.536948024456545, 6.137492345267797, 7.737130209075611, 8.64226946376111, 8.644952456566099, 9.697474361262229, 8.063716077066344, 6.206819436819142, 9.767320964714727  …  5.654692937363391, 5.570295466887589, 5.729337800961119, 7.921141245728466, 8.283112369614667, 6.438383853873987, 5.745189995022012, 7.595178601178292, 7.033904140351726, 7.737473644089036])

You might also get better performance if you reformulate this as a conic program: Tips and Tricks · JuMP

1 Like