Sequence of JuMP problems -- reduce memory usage

Hello everyone,

Thanks for existing! I have a problem withe memory usage when solving a sequence of JuMP problems. The structure of my code is the following.

using JuMP
using Gurobi

function Opt(ID,Ind_availL)
    Routing=demand(ID)
    ArrivalsL=arrivals(ID)
    InflowsL=Inflow(ID)
    OutflowsL=Outflow(ID)
    modelL=Model(() -> Gurobi.Optimizer(env))
    set_string_names_on_creation(modelL, false)
    @variable(modelL, x[p=1:nA,o=1:nA,d=1:nA,t=1:PCov]>=0, Int)
    @variable(modelL, y[p=1:nA,t=1:PCov]>=0, Int)
    @variable(modelL, s[o=1:nA,d=1:nA,t=1:PCov]>=0)
    @constraint(modelL,demands[o=1:nA,d=1:nA,t=1:PCov], sum(x[p,o,d,tau]*Ind_availL[o,t][p,tau] for p in 1:nA, tau in 1:t)+s[o,d,t]==Routing[o,d,t])
    @constraint(modelL, flow[p=1:nA,t=2:PCov], ArrivalsL[p,t]+y[p,t-1]+InflowsL[p,t]-OutflowsL[p,t]==sum(x[p,o,d,t] for o in 1:nA, d in 1:nA)+y[p,t])
    @constraint(modelL, flow_base[p=1:nA], sum(x[p,o,d,1] for o in 1:nA, d in 1:nA)+y[p,1]==InflowsL[p,1])
    @expression(modelL, cost, sum(Cost[p,o,idxVC]*x[p,o,d,t] for p in 1:nA, o in 1:nA, d in 1:nA, t in 1:PCov))
    @expression(modelL, lost, sum(s[o,d,t] for o in 1:nA, d in 1:nA, t in 1:PCov))
    @objective(modelL,Min, cost+10000000*lost)
    optimize!(modelL)
    if has_values(modelL)==true
        Opt=value(cost)
        lost=value(lost)
    else
        Opt="missing"
        lost="missing"
    end
    return Opt,lost
end

function SolveForAll(List_of_IDs,Ind_availL)
    OptSum=0
    Lost=0
    for ID in List_of_IDs
        Em,Los=Opt(ID,Ind_availL)
        if Em=="missing"
            Lost+=100
            OptSum+=200
        else
            EmissionsOpt+=Em
            Lost+=Los
        end
    end
    return OptSum,Lost
end

SolveForAll(MyList,Ind_AvailL)

Basically, the function Opt() creates and solves an integer linear program solved with Gurobi, and the function SolveForAll() solves a sequence of these problems and sums their optimal values if the problems are feasible, otherwise some number (specific to my application, here I say 100 and 200 for simplicity). The other functions called in Opt() are defined in other parts of the code, and, as far as I know, are speedy and not too heavy on RAM.

MyList is a Vector{String} with 170 entries, and for the optimization model I have nA=28, PCov=147. My issue is that, after I call SolveForAll(MyList,Ind_availL), the loop progresses until approx. half the list, and then the whole Julia process is killed. I am told by the server administrator that this is because I hit the RAM limit at my disposal, set at 192Gb. Can anyone advise/suggest what is creating this insane accumulation of stuff (for want of a better term)? Is it s problem with JuMP? Any suggestions on how I can decrease memory usage?

Thanks a lot to everyone!

Your model has a lot of undefined symbols:

  • How are you creating env?
  • What is Cost?
  • What is idxVC?
  • What its nA, PCov?

Your approach is generally the right approach, so this should work. But I think Julia’s garbage collector isn’t kicking in because you have some global variables. It’s hard to tell without a reproducible example.

You want to write your Opt function so that it does not use global variables. That is, everything it needs gets passed in as one of the functions arguments.

1 Like

Hi @odow, thanks for your suggestions.

  • env=Gurobi.Env() is defined like this;
  • Cost is a global variable, a big three-dimensional array of Float64s and idxVC is an index created locally from ID. I accidentally deleted that part.
  • nA and PCov are again global variables, both of them Int64.

I will try passing everything as an argument, including those variables that I defined globally much above in the code.

2 Likes

Also, Opt=value(cost) is bad because it is re-defining the name of a function.

Do instead

if has_values(modelL)==true
    return value(cost), value(lost)
else
    return "missing", "missing"
end
1 Like

Ok, let me elaborate with a MWE. Here is the code I am trying

using Gurobi
using JuMP
using Random

env=Gurobi.Env()
Random.seed!(1234)
nA=28
PCov=147

Cost=rand(nA,nA).>=0.3
function demand(ID)
    return rand(nA,nA,PCov)
end

function arr(ID)
    return rand(nA,PCov)
end

function inflow(ID)
    return hcat(ones(nA),rand(nA,PCov-1))
end

function outflow(ID)
    return rand(nA,PCov).>=0.5
end

function Opt(ID,Cost,Pcov,na)
    RoutingInitialL=demand(ID)
    ArrivalsL=arr(ID)
    InflowsL=inflow(ID)
    OutflowsL=outflow(ID)
    modelL=Model(() -> Gurobi.Optimizer(env))
    set_string_names_on_creation(modelL, false)
    @variable(modelL, x[p=1:na,o=1:na,d=1:na,t=1:Pcov], Int)
    @variable(modelL, y[p=1:na,t=1:Pcov], Int)
    @variable(modelL, s[o=1:na,d=1:na,t=1:Pcov])
    @constraint(modelL,demands[o=1:na,d=1:na,t=1:Pcov], sum(x[p,o,d,tau] for p in 1:na, tau in 1:t)+s[o,d,t]==RoutingInitialL[o,d,t])
    @constraint(modelL, flow[p=1:na,t=2:Pcov], ArrivalsL[p,t]+y[p,t-1]+InflowsL[p,t]-OutflowsL[p,t]==sum(x[p,o,d,t] for o in 1:na, d in 1:na)+y[p,t])
    @constraint(modelL, flow_base[p=1:na], sum(x[p,o,d,1] for o in 1:na, d in 1:na)+y[p,1]==InflowsL[p,1])
    @expression(modelL, emission_cost, sum(Cost[p,o]*x[p,o,d,t] for p in 1:na, o in 1:na, d in 1:na, t in 1:Pcov))
    @expression(modelL, lost_voyages, sum(s[o,d,t] for o in 1:na, d in 1:na, t in 1:Pcov))
    @objective(modelL,Min, emission_cost+10000000*lost_voyages)
    optimize!(modelL)
    if has_values(modelL)==true
        return value(emission_cost),value(lost_voyages)
    else
        return "missing","missing"
    end
end

function SolveForAll(List_IDs,Cost,Pcov,na)
    Em=0
    Los=0
    count=0
    for ID in List_IDs
        println(count+=1)
        E,L=Opt(ID,Cost,Pcov,na)
        if E=="missing"
            Em+=10
            Los+=10
        else
            Em+=E
            Los+=L
        end
    end
    return Em,Los
end

MyList=[randstring(4) for _ in 1:170]
SolveForAll(MyList,Cost,PCov,nA)

The majority (if not all) of the optimization problems will be infeasible, but I don’t think this matters. At the 5th iteration of the loop I am already consuming 11Gb and growing of RAM. I think these functions incorporate all your previous suggestions, right? Do you have any idea what is causing this buildup?

A few comments:

  • You still have global variables. For example, env is not passed into Opt, and the functions like outflow use the global variables nA and PCov
  • These are really big problems! You have 3_226_944 integer variables for x. How long does it take Gurobi to build and solve each problem?
  • You can reduce memory usage with direct_model(Gurobi.Optimizer(env))

Here’s how I’d write your example:

using JuMP
import Gurobi
import Random

function demand(ID, nA, PCov)
    return rand(nA, nA, PCov)
end

function arr(ID, nA, PCov)
    return rand(nA, PCov)
end

function inflow(ID, nA, PCov)
    return hcat(ones(nA), rand(nA, PCov - 1))
end

function outflow(ID, nA, PCov)
    return rand(nA, PCov) .>= 0.5
end

function Opt(ID, Cost, Pcov, na, env)
    RoutingInitialL = demand(ID, na, PCov)
    ArrivalsL = arr(ID, na, PCov)
    InflowsL = inflow(ID, na, PCov)
    OutflowsL = outflow(ID, na, PCov)
    model = direct_model(Gurobi.Optimizer(env))
    set_string_names_on_creation(model, false)
    @variable(model, x[1:na, 1:na, 1:na, 1:Pcov], Int)
    @variable(model, y[1:na, 1:Pcov], Int)
    @variable(model, s[1:na, 1:na, 1:Pcov])
    @constraint(model, [o = 1:na, d = 1:na, t = 1:Pcov], 
        sum(x[:,o,d,:]) + s[o,d,t] == RoutingInitialL[o,d,t],
    )
    @constraint(model, [p = 1:na, t = 2:Pcov],
        ArrivalsL[p,t] + y[p,t-1] + InflowsL[p,t] - OutflowsL[p,t] == sum(x[p,:,:,t]) + y[p,t],
    )
    @constraint(model, [p = 1:na], sum(x[p,:,:,1]) + y[p,1] == InflowsL[p,1])
    @expression(model, emission_cost, 
        sum(Cost[p,o] * sum(x[p,o,:,:]) for p in 1:na, o in 1:na)
    )
    @expression(model, lost_voyages, sum(s))
    @objective(model,Min, emission_cost + 10_000_000 * lost_voyages)
    optimize!(model)
    if has_values(model)
        return value(emission_cost), value(lost_voyages)
    else
        return nothing, nothing
    end
end

function SolveForAll(List_IDs, Cost, Pcov, na, env)
    Em = 0
    Los = 0
    count = 0
    for ID in List_IDs
        println(count += 1)
        E, L = Opt(ID, Cost, Pcov, na, env)
        if E === nothing
            Em += 10
            Los += 10
        else
            Em += E
            Los += L
        end
    end
    return Em, Los
end

env = Gurobi.Env()
Random.seed!(1234)
nA = 28
PCov = 147
Cost = rand(nA, nA) .>= 0.3
MyList = [Random.randstring(4) for _ in 1:170]
SolveForAll(MyList, Cost, PCov, nA, env)

Each problem takes about 2.5 minutes to solve, but I do not see a substantial difference in memory usage even when I try to run your version of the code

I would expect each solve to take a lot of memory. But it should be fairly constant. I wouldn’t expect it to grow over time to 192 Gb.

You could also force the GC after each solve with

E, L = Opt(ID, Cost, Pcov, na, env)
GC.gc()

This last tip definitely worked. Now the usage remain around 1Gb throughout the computation, which is consistent with the size of each problem. Thanks!

1 Like

the equivalent of this in KNITRO is MOI.empty!(backend(model))?
`