Hi,
I’m looking for help with semidefinite programming libraries, in particular Mosek.jl and SCS.jl, which I am accessing via JuMP.
I have a particular semidefinite program, unfortunately not so small, for which SCS and Mosek seem to have wildly different behavior. The JuMP code to create the SDP is below. The objective function of the SDP involves maximizing a sum of 20 variables, all of which are constrained to be at most 1, so the objective value is certainly at most 20. SCS seems to be unstable on this SDP, as different runs of SCS give pretty different objective values. However, it reliably tells me it has found optimal feasible solutions with objective values in the 100s.
Mosek, by contrast, reliably halts with the SLOW_PROGRESS error with unknown feasibility status, and objective value around 5. I suspect this is close to the correct optimal value.
Can anyone help me diagnose what is going on here? Perhaps there is a better way to write this SDP so SCS/Mosek can reliably solve it? Also let me know if I should be posting on some SCS forum instead of the julia forums.
Thank you!
Sam Hopkins (MIT EECS/CSAIL)
n = 20
X1 = [-1.8720260374246223, -1.8781458774724729, -0.17791029084388438, -0.8462955398955371, -2.0469710096036433, -0.35975721626597695, 0.8842577434655674, -1.3223561839837161, 0.6896142719006527, -0.36871492095552355, 2.5457019309298645, 0.43255645627695055, 1.1829341730339282, 0.27928496225015176, -0.7190127944803778, -0.3181873711543086, -0.777522351529572, -0.7510533778023913, 1.092447119866926, -0.7166669366612253]
X2 = [1.1070459415658573, 0.5795843324866427, -0.5863786589759239, -0.6303514730508921, -2.134220474514286, -2.1073153136370113, -1.738225867184788, -0.6228120766649945, 0.3489472785183505, -0.007112356145337337, -0.3095732980138016, 0.6191937516367954, -1.1757497744128709, 0.489785023278765, -0.6099680522048614, 1.5311815120096015, 1.3767360330748495, 0.36292134299401857, -0.38201359174032207, 1.1159100799295802]
y = [1.4482646171826903, -0.13640237028718938, -1.935472303485774, -2.7384350612655957, -8.456692621719586, -6.67386946936305, -4.320712475168053, -3.208524514003925, 1.7419313772371354, -0.3947635292123156, 1.6165609185591465, 2.296956225988994, -2.3443423784949617, 1.7517382625370639, -2.5502531514767033, 4.282043983846345, 3.3633475815066287, 0.32836024839909755, -0.055302855197776786, 2.624222775581259]
y = [X1[i] + 3*X2[i] + 0.01*randn() for i in 1:n]
model = Model(SCS.Optimizer)
@variable(model, 1 >= W[1:n] >= 0)
@variable(model, 1 >= W2[1:n,1:n] >= 0)
@variable(model, beta)
@variable(model, beta2)
@variable(model, beta3)
@variable(model, beta4)
@variable(model, beta5)
@variable(model, beta6)
@variable(model, beta7)
@variable(model, beta8)
@variable(model, Wbeta[1:n])
@variable(model, Wbeta2[1:n] >= 0)
@variable(model, Wbeta3[1:n])
@variable(model, Wbeta4[1:n] >= 0)
# W2 is symmetric
for i in 1:n
for j in 1:n
@constraint(model, W2[i,j] == W2[j,i])
end
end
# diagonals of W2 match W
for i in 1:n
@constraint(model, W2[i,i] == W[i])
end
# lifted positivity constraints corresponding to W[i] is a square
for i in 1:n
@constraint(model, [1 W[i] Wbeta[i] Wbeta2[i]; W[i] W[i] Wbeta[i] Wbeta2[i]; Wbeta[i] Wbeta[i] Wbeta2[i] Wbeta3[i]; Wbeta2[i] Wbeta2[i] Wbeta3[i] Wbeta4[i]] in PSDCone())
end
# wi * beta^2 <= beta^2
for i in 1:n
@constraint(model, Wbeta2[i] <= beta2)
end
# (n+2) x (n+2) matrix is psd
@constraint(model, [1 beta beta2 beta3 beta4 W'; beta beta2 beta3 beta4 beta5 Wbeta'; beta2 beta3 beta4 beta5 beta6 Wbeta2' ; beta3 beta4 beta5 beta6 beta7 Wbeta3' ; beta4 beta5 beta6 beta7 beta8 Wbeta4' ; W Wbeta Wbeta2 Wbeta3 Wbeta4 W2] in PSDCone())
# define residuals for beta regression, assumes that beta2 = 0
weighted_residuals = [X1[i] * Wbeta[i] - y[i] * W2[i,i] for i in 1:n]
# define weighted residuals lifted by beta
weighted_residuals_beta = [X1[i] * Wbeta2[i] - y[i] * Wbeta[i] for i in 1:n]
# constrain to have gradient zero
@constraint(model, sum([X1[i] * weighted_residuals[i] for i in 1:n]) == 0)
@constraint(model, sum([X2[i] * weighted_residuals[i] for i in 1:n]) == 0)
@constraint(model, sum([X1[i] * weighted_residuals_beta[i] for i in 1:n]) == 0)
@constraint(model, sum([X2[i] * weighted_residuals_beta[i] for i in 1:n]) == 0)
# maximize the number of selected samples
@objective(model, Max, sum([W2[i,i] for i in 1:n]))
optimize!(model)
println("Optimal objective value: ", objective_value(model))