The performance problem is likely two-fold.
First, every evaluation of the inner problem requires building and solving 25 models with Mosek.
Second, you have 100 X parameters, so computing the gradient requires on the order of 100 * 25 Mosek solves. And then Ipopt can be quite expensive with the number of function evaluations, trying to do line stepping, etc.
I haven’t tested the code because I don’t have Mosek, but I would try solving the single, larger problem:
function innermost(x,σ,τ,z,S)
(J,K) = size(τ)
opt = Model(Mosek.Optimizer)
set_optimizer_attribute(opt, "QUIET", false)
set_optimizer_attribute(opt, "INTPNT_CO_TOL_DFEAS", 1e-7)
set_optimizer_attribute(opt, "MSK_IPAR_INTPNT_MULTI_THREAD", false)
set_silent(opt)
c = ones(J)
@variable(opt, ν[1:K, 1:S] >= 0)
@variable(opt, μ[1:J, 1:S] >= 0)
@variable(opt, t[1:J, 1:S] >= 0)
@expression(opt, obj[s=1:S], ν[:,s]' * x + sum(t[:,s]))
@objective(opt, Min, sum(obj))
@constraint(
opt,
c[i=1:K, j=1:J, s=1:S],
τ[j,i] / z[i,s] + τ[j,i] * ν[i] - μ[j] >= 0,
)
@constraint(opt, [j=1:J, s=1:S], [t[j,s], μ[j,s], 1] in MOI.PowerCone(1 / σ))
JuMP.optimize!(opt)
return objective_value(opt), sum(value.(ν[:,s]) for s in 1:S)
end
function inner(x,σ,τ,z,α₁,α₂)
Eπ, Eν = innermost(x,σ,τ,z,S)
return Eπ / S .- sum(α₁.*x) .- sum(α₂.*x.^2) , Eν ./ S .- α₁ .- 2 .*α₂.*x
end
You could improve things even further by building the opt problem outside the loop and updating only the objective between solves, but see if the code above helps first.