Hi @odow , thanks for your answer. I’ve implemented your clever suggestion in the MWE below. Now, the code runs successfully. However, in implementing the nested optimization I still have some sort of issue. I do not know whether the problem is a bug in bilevel_problem
or bench_bilevel_nlp
, but it takes almost as twice as long.
For the second issue that you are pointing out, do you have any particular suggestion? The function that I am minimizing is continuous and convex on the vector X, and I have the gradients which are the vector of \nu, so the minimization should be very conventional, aside from the fact that it is nested. I am currently using Ipopt
, because it is standard, however, if I end up solving this issue with the nested optimization, my plan is to use knitro
. But if any other optimizer comes to mind, I would appreciate your suggestions.
The MWE is here:
using JuMP, Ipopt, LinearAlgebra, Random, Distributions, BenchmarkTools, MosekTools, NLopt, Plots, FLoops
Threads.nthreads()
Random.seed!(12345)
const K = 100
const J = 50
const S = 25
const z=rand(K,S) .+ 1.0
const α₁=rand(K) .+ 1.0
const α₂=rand(K).*.1
const σ = 3.0
function distmat(J,K)
Distances = zeros(J,K)
Random.seed!(7777)
coordinates_x = hcat([x for x = 1:J],fill(0.0,J))
coordinates_y = hcat([x for x = 1:K].*J/K,fill(0.0,K))
for j = 1:J, l=1:K
Distances[j,l] = sqrt((coordinates_x[j,1]-coordinates_y[l,1])^2+(coordinates_x[j,2]-coordinates_y[l,2])^2)
end
return 1 .+ .1*Distances
end
const τ = distmat(J,K)
function innermost(x,σ,τ,z,s)
(J,K) = size(τ)
opt = Model(Mosek.Optimizer)
set_optimizer_attribute(opt, "QUIET", false)
set_optimizer_attribute(opt, "INTPNT_CO_TOL_DFEAS", 1e-7)
set_optimizer_attribute(opt, "MSK_IPAR_INTPNT_MULTI_THREAD", false)
set_silent(opt)
c = ones(J)
@variable(opt, ν[1:K] >= 0)
@variable(opt, μ[1:J]>= 0)
@variable(opt, t[1:J]>= 0)
@objective(opt, Min, ν'*x .+ sum(t))
@constraint(
opt,
c[i = i=1:K, j=1:J],
τ[j,i]/z[i,s] + τ[j,i]*ν[i] - μ[j] >= 0,
)
@inbounds for j = 1 : J
@constraint(opt, [t[j],μ[j],1] in MOI.PowerCone(1/σ))
end
JuMP.optimize!(opt)
return objective_value(opt), value.(ν)[1:K], value.(μ)[1:J], dual.(c)
end
function inner(x,σ,τ,z,α₁,α₂)
Eπ, Eν = 0.0, zeros(K)
for s in 1:S
temp = innermost(x,σ,τ,z,s)
Eπ += temp[1]
Eν .+= temp[2]
end
return Eπ / S .- sum(α₁.*x) .- sum(α₂.*x.^2) , Eν ./ S .- α₁ .- 2 .*α₂.*x
end
function innermost1(x,σ,τ,z,S)
(J,K) = size(τ)
opt = Model(Mosek.Optimizer)
set_optimizer_attribute(opt, "QUIET", false)
set_optimizer_attribute(opt, "INTPNT_CO_TOL_DFEAS", 1e-7)
set_optimizer_attribute(opt, "MSK_IPAR_INTPNT_MULTI_THREAD", false)
set_silent(opt)
@variable(opt, ν[1:K, 1:S] >= 0)
@variable(opt, μ[1:J, 1:S] >= 0)
@variable(opt, t[1:J, 1:S] >= 0)
@expression(opt, obj[s=1:S], ν[:,s]' * x + sum(t[:,s]))
@objective(opt, Min, sum(obj))
@constraint(
opt,
c[i=1:K, j=1:J, s=1:S],
τ[j,i] / z[i,s] + τ[j,i] * ν[i,s] - μ[j,s] >= 0,
)
@constraint(opt, [j=1:J, s=1:S], [t[j,s], μ[j,s], 1] in MOI.PowerCone(1 / σ))
JuMP.optimize!(opt)
return objective_value(opt), sum(value.(ν[:,s]) for s in 1:S)
end
function inner1(x,σ,τ,z,α₁,α₂)
Eπ, Eν = innermost1(x,σ,τ,z,S)
return Eπ / S .- sum(α₁.*x) .- sum(α₂.*x.^2) , Eν ./ S .- α₁ .- 2 .*α₂.*x
end
function outer(guess,σ,τ,z,α₁,α₂)
function f1(x::Vector, grad::Vector)
temp = inner(x,σ,τ,z,α₁,α₂)
if length(grad) > 0
grad[:]= -temp[2]
end
return -temp[1]
end
function optimization_step(guess)
opt = Opt(:LD_SLSQP, K)
opt.lower_bounds = zeros(K)
opt.xtol_rel = sqrt(eps())
opt.ftol_rel = eps()
opt.min_objective = f1
(minf,minx,ret) = NLopt.optimize(opt, guess)
numevals = opt.numevals # the number of function evaluations
#println("got $minf at $minx after $numevals iterations in (returned $ret)")
return minx, minf
end
minx, minf = optimization_step(guess)
return minx, -minf
end
function outer1(guess,σ,τ,z,α₁,α₂)
function f1(x::Vector, grad::Vector)
temp = inner1(x,σ,τ,z,α₁,α₂)
if length(grad) > 0
grad[:]= -temp[2]
end
return -temp[1]
end
function optimization_step(guess)
opt = Opt(:LD_SLSQP, K)
opt.lower_bounds = zeros(K)
opt.xtol_rel = sqrt(eps())
opt.ftol_rel = eps()
opt.min_objective = f1
(minf,minx,ret) = NLopt.optimize(opt, guess)
numevals = opt.numevals # the number of function evaluations
#println("got $minf at $minx after $numevals iterations in (returned $ret)")
return minx, minf
end
minx, minf = optimization_step(guess)
return minx, -minf
end
println("Looping over S in Inner")
@btime innermost(ones(K),σ,τ,z,1) ;
@btime inner(ones(K),σ,τ,z,α₁,α₂) ;
(xstar,fstar) = @btime outer(ones(K),σ,τ,z,α₁,α₂) ;
println("fstar:", fstar)
println("Looping over S in Innermost")
@btime innermost1(ones(K),σ,τ,z,S) ;
@btime inner1(ones(K),σ,τ,z,α₁,α₂) ;
(xstar1,fstar1) = @btime outer1(ones(K),σ,τ,z,α₁,α₂) ;
println("fstar1:", fstar1)
println("Using Ipopt and Bilevel Problem in JUMP")
function bilevel_problem(σ,τ,z,α₁,α₂)
last_x, last_f, last_y = nothing, 0.0, [NaN, NaN]
function _update(x...)
if last_x == x
return last_y
end
(last_f,last_y) = inner1(collect(x),σ,τ,z,α₁,α₂)
last_x = x
return last_y
end
function f(x...)
_update(x...)
return -last_f
end
function ∇f(g, x...)
_update(x...)
g .= -last_y
return
end
return _update, f, ∇f
end
function bench_bilevel_nlp(σ,τ,z,α₁,α₂)
model = Model(Ipopt.Optimizer)
set_optimizer_attribute(model, "tol", 1e-3)
@variable(model, X[1:K] >= 0)
subproblem, f, ∇f = bilevel_problem(σ,τ,z,α₁,α₂)
register(model, :bilevel_f, K, f, ∇f)
@NLobjective(model, Min, bilevel_f(X...))
JuMP.optimize!(model)
@assert termination_status(model) == LOCALLY_SOLVED
println(objective_value(model))
#println("x = ", value.(X))
#println("y = ", subproblem(value.(X)...))
return value.(X), -subproblem(value.(X)...)
end
(xstar2,fstar2) = @btime bench_bilevel_nlp(σ,τ,z,α₁,α₂)
println("fstar2:", fstar2)
and the output is here,
Looping over S in Inner
81.041 ms (332353 allocations: 20.98 MiB)
1.655 s (8308851 allocations: 524.37 MiB)
43.999 s (377874883 allocations: 23.28 GiB)
fstar
13.65302
Looping over S in Innermost
2.426 s (7380607 allocations: 491.34 MiB)
2.408 s (7380610 allocations: 491.35 MiB)
39.876 s (250960869 allocations: 16.31 GiB)
fstar1
13.65304
This is Ipopt version 3.14.4, running with linear solver MUMPS 5.4.1.
Number of nonzeros in equality constraint Jacobian...: 0
Number of nonzeros in inequality constraint Jacobian.: 0
Number of nonzeros in Lagrangian Hessian.............: 0
Total number of variables............................: 100
variables with only lower bounds: 100
variables with lower and upper bounds: 0
variables with only upper bounds: 0
Total number of equality constraints.................: 0
Total number of inequality constraints...............: 0
inequality constraints with only lower bounds: 0
inequality constraints with lower and upper bounds: 0
inequality constraints with only upper bounds: 0
iter objective inf_pr inf_du lg(mu) ||d|| lg(rg) alpha_du alpha_pr ls
0 -4.6564070e+00 0.00e+00 1.00e+00 0.0 0.00e+00 - 0.00e+00 0.00e+00 0
1 -7.4529206e+00 0.00e+00 1.59e-02 -1.7 1.79e-02 - 1.00e+00 1.00e+00f 1
2 -8.9898544e+00 0.00e+00 1.80e-03 -3.5 2.25e-02 - 1.00e+00 1.00e+00f 1
3 -1.0099893e+01 0.00e+00 1.30e-03 -4.8 4.62e-02 - 1.00e+00 1.00e+00f 1
4 -1.1409563e+01 0.00e+00 6.85e-04 -4.8 1.07e-01 - 9.80e-01 1.00e+00f 1
5 -1.2170621e+01 0.00e+00 4.57e-04 -4.8 1.67e-01 - 9.97e-01 7.67e-01f 1
6 -1.2623251e+01 0.00e+00 3.87e-04 -4.6 3.02e-01 - 1.00e+00 8.88e-01f 1
7 -1.3059052e+01 0.00e+00 2.60e-04 -4.9 2.20e-01 - 9.91e-01 1.00e+00f 1
8 -1.3405223e+01 0.00e+00 1.94e-04 -5.4 1.48e-01 - 9.99e-01 9.00e-01f 1
9 -1.3517947e+01 0.00e+00 2.28e-04 -5.8 1.80e-01 - 1.00e+00 6.30e-01f 1
iter objective inf_pr inf_du lg(mu) ||d|| lg(rg) alpha_du alpha_pr ls
10 -1.3593800e+01 0.00e+00 1.82e-04 -6.0 7.27e-02 - 9.99e-01 9.79e-01f 1
11 -1.3622958e+01 0.00e+00 3.63e-04 -6.7 5.59e-02 - 1.00e+00 6.07e-01f 1
12 -1.3269251e+01 0.00e+00 2.93e-04 -5.1 1.03e-01 - 9.14e-01 1.00e+00f 1
13 -1.3355551e+01 0.00e+00 1.48e-04 -5.2 5.45e-02 - 1.00e+00 1.00e+00f 1
14 -1.3369108e+01 0.00e+00 1.21e-04 -5.2 4.40e-02 - 1.00e+00 1.00e+00f 1
15 -1.3372775e+01 0.00e+00 6.94e-05 -5.2 1.87e-02 - 1.00e+00 1.00e+00f 1
16 -1.3372001e+01 0.00e+00 2.99e-05 -5.2 1.25e-02 - 1.00e+00 1.00e+00f 1
17 -1.3624447e+01 0.00e+00 1.98e-04 -7.8 7.08e-02 - 8.95e-01 9.91e-01f 1
18 -1.3643050e+01 0.00e+00 4.01e-04 -6.8 4.97e-02 - 1.00e+00 8.90e-01f 1
19 -1.3360714e+01 0.00e+00 3.84e-04 -5.2 9.15e-02 - 9.45e-01 1.00e+00f 1
iter objective inf_pr inf_du lg(mu) ||d|| lg(rg) alpha_du alpha_pr ls
20 -1.3428833e+01 0.00e+00 1.19e-04 -5.3 3.37e-02 - 1.00e+00 1.00e+00f 1
21 -1.3436063e+01 0.00e+00 7.55e-05 -5.3 1.66e-02 - 1.00e+00 1.00e+00f 1
22 -1.3435569e+01 0.00e+00 4.03e-05 -5.3 6.90e-03 - 1.00e+00 1.00e+00f 1
23 -1.3635171e+01 0.00e+00 2.78e-04 -7.8 7.46e-02 - 8.74e-01 9.89e-01f 1
24 -1.3644960e+01 0.00e+00 5.63e-04 -7.8 4.55e-02 - 9.43e-01 5.52e-01f 1
25 -1.3575224e+01 0.00e+00 2.74e-04 -5.8 3.06e-02 - 1.00e+00 1.00e+00f 1
26 -1.3589880e+01 0.00e+00 7.45e-05 -5.8 1.97e-02 - 1.00e+00 1.00e+00f 1
27 -1.3591803e+01 0.00e+00 3.37e-05 -5.8 1.72e-02 - 1.00e+00 1.00e+00f 1
28 -1.3592033e+01 0.00e+00 4.62e-05 -5.8 7.57e-03 - 1.00e+00 1.00e+00f 1
29 -1.3592075e+01 0.00e+00 4.83e-06 -5.8 3.73e-03 - 1.00e+00 1.00e+00f 1
iter objective inf_pr inf_du lg(mu) ||d|| lg(rg) alpha_du alpha_pr ls
30 -1.3649657e+01 0.00e+00 7.04e-05 -7.8 1.85e-02 - 9.79e-01 1.00e+00f 1
31 -1.3652124e+01 0.00e+00 5.60e-04 -8.9 1.36e-02 - 1.00e+00 7.51e-01f 1
Number of Iterations....: 31
(scaled) (unscaled)
Objective...............: -2.1371882502778616e-02 -1.3652123983924689e+01
Dual infeasibility......: 5.5983821986408506e-04 3.5761851056080318e-01
Constraint violation....: 0.0000000000000000e+00 0.0000000000000000e+00
Variable bound violation: 0.0000000000000000e+00 0.0000000000000000e+00
Complementarity.........: 7.2637672397801354e-08 4.6400147920968588e-05
Overall NLP error.......: 5.5983821986408506e-04 3.5761851056080318e-01
Number of objective function evaluations = 32
Number of objective gradient evaluations = 32
Number of equality constraint evaluations = 0
Number of inequality constraint evaluations = 0
Number of equality constraint Jacobian evaluations = 0
Number of inequality constraint Jacobian evaluations = 0
Number of Lagrangian Hessian evaluations = 0
Total seconds in IPOPT = 75.508
75.528 s
fstar2
13.6521
Finally, do you have any intuition for why the function inner
is faster that inner1
?