I have a nonlinear problem that I wish to solve with Ipopt and derive gradients with DiffOpt.jl. Unfortunately, I’m running into problems with the gradients from DiffOpt. I suspect that I might be able to resolve those issues by reformulating my problem, but I’ve had no luck with that so far.
The first formulation I tried is as follows.
# parameter
ϕ0=[0.6];
# other constants
Phigh = 0.4412123910369964;
Plow = 0.357171935601378;
c(Nsep) = (1 / (1 + 2*Nsep))^2
cols = 2;
reqwakes = 1;
Nwakes = 1;
aprev = 0.1 * ones(2, 1);
rhist = Phigh * ones(1);
normalized_rhist = [1];
predhorz = 2;
KP = 0.5 * π/4;
# model definition
model = DiffOpt.nonlinear_diff_model(Ipopt.Optimizer)
JuMP.@variable(model, 0.05 <= a[1:cols, -reqwakes:Nwakes] <= 1/3)
for j=1:cols, n=1:reqwakes
JuMP.fix(a[j, n - cols], aprev[j, n]; force=true)
end
JuMP.fix.(a[end, 0:end], 0.333333; force=true)
JuMP.@variable(model, ϕ in JuMP.Parameter(ϕ0[1]))
scale = (Phigh - Plow) / 2;
shift = (Phigh + Plow) / 2;
JuMP.@variables(model, begin
Pref_unscaled[t=1:predhorz]
Pref_var[t=1:predhorz]
end)
JuMP.fix.(Pref_unscaled[1], reverse(normalized_rhist))
JuMP.@constraints(model, begin
[t=2:predhorz], Pref_unscaled[t] .== ϕ * Pref_unscaled[t-1]
[t=1:predhorz], Pref_var[t] == (scale * Pref_unscaled[t] + shift)
end)
JuMP.@expressions(model, begin
CP[j=1:cols, n=0:Nwakes], 4 * a[j,n] * (1 - a[j,n])^2
δV[j=1:cols, n=0:Nwakes], 2 * sqrt( sum( (a[prev,n-(j-prev)] * c(j-prev) )^2 for prev=1:j-1; init=0) )
u[j=1:cols, n=0:Nwakes], (1 - δV[j,n])
Pcap[j=1:cols, n=0:Nwakes], CP[j,n] * u[j,n]^3
Pgen[n=0:Nwakes], sum(Pcap[j,n] for j=1:cols)
end)
JuMP.@objective(model, Min, sum( (KP * Pgen[n] - Pref_var[1+n])^2 for n=0:Nwakes))
JuMP.optimize!(model)
JuMP.assert_is_solved_and_feasible(model)
We can compare the gradients from DiffOpt and finite differencing as follows:
### DIFFOPT ###
DiffOpt.empty_input_sensitivities!(model)
DiffOpt.set_reverse_variable.(model, model[:a][1, 0], 1)
DiffOpt.reverse_differentiate!(model)
dadϕ = DiffOpt.get_reverse_parameter.(model, model[:ϕ]);
### FINITE DIFFERENCE ###
function solve_a_ϕ0(ϕ0)
JuMP.set_parameter_value.(model[:ϕ], ϕ0)
JuMP.optimize!(model)
JuMP.assert_is_solved_and_feasible(model)
asolved_new = JuMP.value.(model[:a]);
return asolved_new[1, 0]
end
dadϕ_fdm = FiniteDifferences.grad(FiniteDifferences.forward_fdm(2, 1), solve_a_ϕ0, ϕ0);
@show dadϕ;
@show dadϕ_fdm;
which end up being quite different.
One reformulation I tried was to get rid of the square root since its derivative blows up near zero.
# parameter
ϕ0=[0.6];
# other constants
Phigh = 0.4412123910369964;
Plow = 0.357171935601378;
c(Nsep) = (1 / (1 + 2*Nsep))^2
cols = 2;
reqwakes = 1;
Nwakes = 1;
aprev = 0.1 * ones(2, 1);
rhist = Phigh * ones(1);
normalized_rhist = [1];
predhorz = 2;
KP = 0.5 * π/4;
# model definition
model = DiffOpt.nonlinear_diff_model(Ipopt.Optimizer)
JuMP.@variable(model, 0.05 <= a[1:cols, -reqwakes:Nwakes] <= 1/3)
for j=1:cols, n=1:reqwakes
JuMP.fix(a[j, n - cols], aprev[j, n]; force=true)
end
JuMP.fix.(a[end, 0:end], 0.333333; force=true)
JuMP.@variable(model, ϕ in JuMP.Parameter(ϕ0[1]))
scale = (Phigh - Plow) / 2;
shift = (Phigh + Plow) / 2;
JuMP.@variables(model, begin
Pref_unscaled[t=1:predhorz]
Pref_var[t=1:predhorz]
end)
JuMP.fix.(Pref_unscaled[1], reverse(normalized_rhist))
JuMP.@constraints(model, begin
[t=2:predhorz], Pref_unscaled[t] .== ϕ * Pref_unscaled[t-1]
[t=1:predhorz], Pref_var[t] == (scale * Pref_unscaled[t] + shift)
end)
### REFORMULATION START ###
JuMP.@variables(model, begin
0 <= δV[j=1:cols, n=0:Nwakes] <= 1
end)
JuMP.@constraints(model, begin
[j=1:cols, n=0:Nwakes], (δV[j, n])^2 == 4 * sum( (a[prev,n-(j-prev)] * c(j-prev) )^2 for prev=1:j-1; init=0)
end)
JuMP.@expressions(model, begin
u[j=1:cols, n=0:Nwakes], 1 - δV[j,n]
CP[j=1:cols, n=0:Nwakes], 4 * a[j,n] * (1 - a[j,n])^2
Pcap[j=1:cols, n=0:Nwakes], CP[j,n] * u[j,n]^3
Pgen[n=0:Nwakes], sum(Pcap[j,n] for j=1:cols)
end)
### REFORMULATION END ###
JuMP.@objective(model, Min, sum( (KP * Pgen[n] - Pref_var[1+n])^2 for n=0:Nwakes))
JuMP.optimize!(model)
JuMP.assert_is_solved_and_feasible(model)
However, this did not seem to change the results and also made the problem harder to solve for other sets of problem data. I also tried various combinations of changing the expressions into variables and constraints, but these also seemed to make the problem harder to solve. I would appreciate any suggestions here. Thanks!