A Primal-Dual Global Optimization approach on the Cutting Stock Problem

The previous posts are based on the primary case where we minimize the number of raw rolls.
This is actually a degenerate case which leads to homogeneous blocks—meaning that only one subblock is needed when writting code.

Therefore, I extend the Cutting Stock Problem to a heterogeneous version—multiple different blocks—each being a (MIP) subproblem.

I’ve devised a satisfying (I think it’s pretty cool) algorithm which I’m going to call it a sandwiched MIP method (strengthening the original MIP formulation by sandwiching it).

Roughly speaking, Gurobi doesn’t perform well on the original compact formulation----say, converges to 1% after 2 hours—and become slower and slower due to huge size of branches.
While my algorithm spend nearly 1 hour to train a tight Lagrangian bound. Then after sandwiching, Gurobi returns me the OPTIMAL termination status with a 0.0098% gap.

Currently the main concern in my algorithm is a heuristic employed to identify some promising subproblem block—I’m unaware of if there is a good strategy. (x-ref Find the max element of a numeric vector iteratively with mask and early break)

My implementation is

code
import JuMP, Gurobi; GRB_ENV = Gurobi.Env();
import LinearAlgebra.⋅ as ⋅
import Random
Random.seed!(321); # ⚠️ when modify this, U should also modify hyperparam

I, J, w, d, W, c = let # 📄 load data
    δ = 1.7e-12 # used in test case generation
    I = 20; # [demand side] number of types of demand
    J = 400; # [resource side] number of raw rolls
    wl, wh = rand(13:δ:35), rand(77:δ:97)
    w = rand(wl:δ:wh, I) # width of each demand type
    dl, dh = rand(7:14), rand(57:77)
    d = rand(dl:dh, I) # quantity::Int of each demand type
    Wl, Wh = rand(70:δ:100), rand(400:δ:500)
    W = rand(Wl:δ:Wh, J) # width of each raw roll
    c = 0.01rand(0.99:δ:1.99, J) .* W # used cost of raw rolls (c)
    I, J, w, d, W, c
end;

if false # 📘 The original compact Mip formulation---as a benchmark
    Mip = JuMP.Model(() -> Gurobi.Optimizer(GRB_ENV)); 
    JuMP.@variable(Mip, 0 ≤ b[1:J] ≤ 1, Int);
    JuMP.@objective(Mip, Min, c ⋅ b);
    JuMP.@variable(Mip, 0 ≤ n[1:I, 1:J], Int);
    JuMP.@constraint(Mip, [j = 1:J], w ⋅ view(n, :, j) ≤ b[j]W[j]);
    # Note that the following complicating constraint 🟡 can also be a `==` one
    # We prefer write it as `≥` since the dual space will be halved
    JuMP.@constraint(Mip, [i = 1:I], sum(n[i, j] for j in 1:J) ≥ d[i]); # 🟡 [π ≥ 0]
    JuMP.unset_integer.(b)
    JuMP.unset_integer.(n)
    JuMP.optimize!(Mip) # 📘 [Natural LP Relaxation] Optimal objective 647.4510818
    JuMP.set_integer.(b)
    JuMP.set_integer.(n)
    JuMP.optimize!(Mip)
    # Gurobi's MIP logging
    # 6576  4630  647.68526   88  216  667.02107  647.47885  2.93%   6.3   10s
    # 1498325 1290411  650.72418 1504  203  654.50850  647.48232  1.07%  11.4 6482s
    # 📘 Conclusion: Gurobi spends 6482s to reach ub = 654.50850 | 647.48232 = lb 
end

# Dual Decomposition Assets
function Min_j_fixed_part(j)
    Min_j = JuMP.Model(() -> Gurobi.Optimizer(GRB_ENV))
    JuMP.@variable(Min_j, b_j, Bin);
    JuMP.@expression(Min_j, prim_obj_tbMin, c[j]b_j)
    JuMP.@variable(Min_j, 0 ≤ n_j[1:I], Int);
    JuMP.@constraint(Min_j, w ⋅ n_j ≤ W[j]b_j)
    return Min_j
end; function penalty(j, π, n_j::Array{JuMP.VariableRef})
    model = JuMP.owner_model(n_j[1])
    return JuMP.@expression(model, -π ⋅ n_j)
end; function penalty(j, π::Array{JuMP.VariableRef}, n_j)
    model = JuMP.owner_model(π[1])
    return JuMP.@expression(model, -π ⋅ n_j)
end; function draw_a_block!(bit_vec, Θ, Θ_old) # 🟠💡 This is an important heuristic.
    # I'm not aware of any better heuristic method to pick a promising block (that can
    # contribute a violating cut to the master)
    max_i, max_v = 0, -Inf
    for (i, undrawn) in enumerate(bit_vec)
        if undrawn
            v = Θ[i] - Θ_old[i] # 💡
            if max_v < v
                max_i, max_v = i, v
            end
        end
    end
    if max_i == 0 # cannot draw one due to all-fathomed
        return false, max_i
    else
        bit_vec[max_i] = false # indicating that max_i is drawn this time
        return true, max_i # can be drawn, a valid index
    end
end;

# build subproblems (MIPs)
Min_vec = [Min_j_fixed_part(j) for j = 1:J]; # The inner subproblems
# allocate these to collect columns identified for each block
n_cols = [[zeros(Int, I)] for j = 1:J]; pop!.(n_cols);
b_cols = [[false] for j = 1:J];         pop!.(b_cols);

# build master problem (LP)
an_UB = 685 # ⚠️ [hyperparam] a valid ub obtained from Gurobi's trial solve
out = JuMP.Model(() -> Gurobi.Optimizer(GRB_ENV)); # The outer dual problem
JuMP.@variable(out, 0 ≤ π[1:I]); JuMP.@variable(out, θ[1:J]);
JuMP.@expression(out, common, d ⋅ π);
JuMP.@expression(out, outer_obj_tbMax, common + sum(θ));
JuMP.@constraint(out, outer_obj_tbMax ≤ an_UB);
JuMP.@objective(out, Max, outer_obj_tbMax);

# The following main loop generates will a best possible dual variable via CTPLNs
JuMP.set_silent.(Min_vec); JuMP.optimize!.(Min_vec);
JuMP.set_silent(out); JuMP.optimize!(out); JuMP.assert_is_solved_and_feasible(out; allow_local = false)
Θ_old = JuMP.value.(θ); # 💡 need this for my heuristic
cnt_outˈs_cut = 0; # count the total number of cuts added to the outer model
cnt_futile_draw = 0; # a bad block-drawing strategy 🟠💡 might make this index large
t0 = time();
for COT = 5 * (0.5 .^ (0:16)) # Cut-Off Tolerance ⚠️ CANNOT by too small
    while true # work with the present COT
        exists_j_at_COT = false
        JuMP.optimize!(out); JuMP.assert_is_solved_and_feasible(out; allow_local = false)
        ub = JuMP.objective_bound(out)
        Π, Θ = JuMP.value.(π), JuMP.value.(θ)
        Θˈs_bitvec = trues(J) # 💡 need this for my heuristic
        while true # see if there is any block who can bring us a COT-violating cut
            still_has_undrawn, j = draw_a_block!(Θˈs_bitvec, Θ, Θ_old)
            still_has_undrawn || break
            Min_j = Min_vec[j]; prim_obj_tbMin, n_j = Min_j[:prim_obj_tbMin], Min_j[:n_j]
            JuMP.@objective(Min_j, Min, prim_obj_tbMin + penalty(j, Π, n_j)) # 🟢
            JuMP.optimize!(Min_j); JuMP.assert_is_solved_and_feasible(Min_j; allow_local = false)
            ObjVal_j = JuMP.objective_value(Min_j)
            if ObjVal_j + COT < Θ[j] # the hallmark of CTPLN's progress
                # Since we implement a single-cut algorithm, we can compress Logging into one line
                @info "#cut = $cnt_outˈs_cut, ub ▶ $ub | cut found at $j with vio = $(Θ[j] - ObjVal_j)"
                Θ_old = Θ # save the RHS, as it is to be updated
                Prim_obj_tbMin, N_j = JuMP.value(prim_obj_tbMin), JuMP.value.(n_j)
                JuMP.@constraint(out, θ[j] ≤ Prim_obj_tbMin + penalty(j, π, N_j)) # 🟢
                push!(n_cols[j], round.(Int, N_j))
                push!(b_cols[j], round(Bool, JuMP.value(Min_j[:b_j])))
                cnt_outˈs_cut += 1; exists_j_at_COT = true
                break # [Early Break] go back to outer model to update Π
            else
                cnt_futile_draw += 1
            end
        end
        if ! exists_j_at_COT
            @info "COT = $COT, elapsed $(time() - t0), futile $cnt_futile_draw, ub $ub ◀◀◀"
            break
        end
    end
end
@info "▶▶▶ total time elapsed is $(time() - t0)"
# retrieve that best dual solution
JuMP.optimize!(out); JuMP.assert_is_solved_and_feasible(out; allow_local = false); Π = JuMP.value.(π)

# Use the columns you had collected to build a restricted version of the original Mip
# The aim is to derive a good primal-side feasible solution
rMip = JuMP.Model(() -> Gurobi.Optimizer(GRB_ENV)); # restricted primal side Mip
JuMP.@variable(rMip, rb[1:J], Int); # `r` reads `restricted`
JuMP.@variable(rMip, rn[1:I, 1:J], Int);
let
    JuMP.@variable(rMip, lb[j = 1:J, v = 1:length(b_cols[j])] ≥ 0); # each v indexes a vertex
    JuMP.@variable(rMip, ln[j = 1:J, v = 1:length(n_cols[j])] ≥ 0); # `l` reads `lambda`
    JuMP.@constraint(rMip, [j = 1:J], sum(lb[j, :]) == 1);
    JuMP.@constraint(rMip, [j = 1:J], sum(ln[j, :]) == 1);
    JuMP.@constraint(rMip, [j = 1:J], rb[j] == sum(lb[j, v]b_cols[j][v]                 for v = 1:length(b_cols[j])));
    JuMP.@constraint(rMip, [i = 1:I, j = 1:J], rn[i, j] == sum(ln[j, v]n_cols[j][v][i]  for v = 1:length(n_cols[j])));
end;
JuMP.@constraint(rMip, [j = 1:J], w ⋅ view(rn, :, j) ≤ rb[j]W[j]);
JuMP.@constraint(rMip, [i = 1:I], sum(rn[i, j] for j in 1:J) ≥ d[i]); # 🟡
JuMP.@objective(rMip, Min, c ⋅ rb);
# This model may still take some time to solve to exact optimality, but it's a lot easier than the unrestricted original
# you may also interrupt early and be content with a good feasible (suboptimal) solution
JuMP.optimize!(rMip); # JuMP.assert_is_solved_and_feasible(rMip; allow_local = false)
b_start = JuMP.value.(rb);
n_start = JuMP.value.(rn);

# Finally build an sandwiched version of the original Mip such that
# it can carry on searching the global optimal solution to the original Mip
lbv = let
    lbv = zeros(J)
    for j = 1:J
        Min_j = Min_vec[j]
        b_j = Min_j[:b_j]
        n_j = Min_j[:n_j]
        JuMP.@objective(Min_j, Min, c[j]b_j - Π ⋅ n_j)
        JuMP.optimize!(Min_j); JuMP.assert_is_solved_and_feasible(Min_j; allow_local = false)
        lbv[j] = JuMP.objective_value(Min_j)
    end
    lbv
end;
swMip = JuMP.Model(() -> Gurobi.Optimizer(GRB_ENV)); # sandwiched Mip
JuMP.@variable(swMip, b[1:J], Bin);
JuMP.@objective(swMip, Min, c ⋅ b);
JuMP.@variable(swMip, 0 ≤ n[1:I, 1:J], Int);
JuMP.@constraint(swMip, [j = 1:J], w ⋅ view(n, :, j) ≤ b[j]W[j]);
JuMP.@constraint(swMip, [i = 1:I], sum(n[i, j] for j in 1:J) ≥ d[i]); # 🟡
JuMP.@constraint(swMip, [j = 1:J], c[j]b[j] - Π ⋅ view(n, :, j) ≥ lbv[j]); # 🥪 The lower bread of the sandwich---DWB cuts
JuMP.set_start_value.(b, b_start); JuMP.set_start_value.(n, n_start); # 🥪 The upper bread of the sandwich
JuMP.optimize!(swMip) # 🖥️ U can monitor Gurobi's logging