Help reformulating nonlinear problem

I have a nonlinear problem that I wish to solve with Ipopt and derive gradients with DiffOpt.jl. Unfortunately, I’m running into problems with the gradients from DiffOpt. I suspect that I might be able to resolve those issues by reformulating my problem, but I’ve had no luck with that so far.

The first formulation I tried is as follows.

# parameter
ϕ0=[0.6];

# other constants
Phigh = 0.4412123910369964;
Plow = 0.357171935601378;
c(Nsep) = (1 / (1 + 2*Nsep))^2
cols = 2; 
reqwakes = 1; 
Nwakes = 1;
aprev = 0.1 * ones(2, 1);
rhist = Phigh * ones(1);
normalized_rhist = [1];
predhorz = 2;
KP = 0.5 * π/4;

# model definition
model = DiffOpt.nonlinear_diff_model(Ipopt.Optimizer)

JuMP.@variable(model, 0.05 <= a[1:cols, -reqwakes:Nwakes] <= 1/3)

for j=1:cols, n=1:reqwakes
    JuMP.fix(a[j, n - cols], aprev[j, n]; force=true)
end

JuMP.fix.(a[end, 0:end], 0.333333; force=true)

JuMP.@variable(model, ϕ in JuMP.Parameter(ϕ0[1]))

scale = (Phigh - Plow) / 2;
shift = (Phigh + Plow) / 2;

JuMP.@variables(model, begin
               Pref_unscaled[t=1:predhorz]
               Pref_var[t=1:predhorz]
           end)

JuMP.fix.(Pref_unscaled[1], reverse(normalized_rhist))

JuMP.@constraints(model, begin
                 [t=2:predhorz], Pref_unscaled[t] .== ϕ * Pref_unscaled[t-1]
                 [t=1:predhorz], Pref_var[t] == (scale * Pref_unscaled[t] + shift)
             end)

JuMP.@expressions(model, begin
                     CP[j=1:cols, n=0:Nwakes], 4 * a[j,n] * (1 - a[j,n])^2
                     δV[j=1:cols, n=0:Nwakes], 2 * sqrt( sum( (a[prev,n-(j-prev)] * c(j-prev) )^2 for prev=1:j-1; init=0) )
                     u[j=1:cols, n=0:Nwakes], (1 - δV[j,n])
                     Pcap[j=1:cols, n=0:Nwakes], CP[j,n] * u[j,n]^3
                     Pgen[n=0:Nwakes], sum(Pcap[j,n] for j=1:cols)
                 end)

JuMP.@objective(model, Min, sum( (KP * Pgen[n] - Pref_var[1+n])^2 for n=0:Nwakes))

JuMP.optimize!(model)
JuMP.assert_is_solved_and_feasible(model)

We can compare the gradients from DiffOpt and finite differencing as follows:

### DIFFOPT ###
DiffOpt.empty_input_sensitivities!(model)

DiffOpt.set_reverse_variable.(model, model[:a][1, 0], 1)
DiffOpt.reverse_differentiate!(model)

dadϕ = DiffOpt.get_reverse_parameter.(model, model[:ϕ]);

### FINITE DIFFERENCE ###
function solve_a_ϕ0(ϕ0)
    JuMP.set_parameter_value.(model[:ϕ], ϕ0)
    
    JuMP.optimize!(model)

    JuMP.assert_is_solved_and_feasible(model)

    asolved_new = JuMP.value.(model[:a]);

    return asolved_new[1, 0]
end

dadϕ_fdm = FiniteDifferences.grad(FiniteDifferences.forward_fdm(2, 1), solve_a_ϕ0, ϕ0);

@show dadϕ;
@show dadϕ_fdm;

which end up being quite different.

One reformulation I tried was to get rid of the square root since its derivative blows up near zero.

# parameter
ϕ0=[0.6];

# other constants
Phigh = 0.4412123910369964;
Plow = 0.357171935601378;
c(Nsep) = (1 / (1 + 2*Nsep))^2
cols = 2; 
reqwakes = 1; 
Nwakes = 1;
aprev = 0.1 * ones(2, 1);
rhist = Phigh * ones(1);
normalized_rhist = [1];
predhorz = 2;
KP = 0.5 * π/4;

# model definition
model = DiffOpt.nonlinear_diff_model(Ipopt.Optimizer)

JuMP.@variable(model, 0.05 <= a[1:cols, -reqwakes:Nwakes] <= 1/3)

for j=1:cols, n=1:reqwakes
    JuMP.fix(a[j, n - cols], aprev[j, n]; force=true)
end

JuMP.fix.(a[end, 0:end], 0.333333; force=true)

JuMP.@variable(model, ϕ in JuMP.Parameter(ϕ0[1]))

scale = (Phigh - Plow) / 2;
shift = (Phigh + Plow) / 2;

JuMP.@variables(model, begin
               Pref_unscaled[t=1:predhorz]
               Pref_var[t=1:predhorz]
           end)

JuMP.fix.(Pref_unscaled[1], reverse(normalized_rhist))

JuMP.@constraints(model, begin
                 [t=2:predhorz], Pref_unscaled[t] .== ϕ * Pref_unscaled[t-1]
                 [t=1:predhorz], Pref_var[t] == (scale * Pref_unscaled[t] + shift)
             end)

### REFORMULATION START ###
JuMP.@variables(model, begin
        0 <= δV[j=1:cols, n=0:Nwakes] <= 1
    end)

JuMP.@constraints(model, begin
        [j=1:cols, n=0:Nwakes], (δV[j, n])^2 == 4 * sum( (a[prev,n-(j-prev)] * c(j-prev) )^2 for prev=1:j-1; init=0)
    end)

JuMP.@expressions(model, begin
                     u[j=1:cols, n=0:Nwakes], 1 - δV[j,n]
                     CP[j=1:cols, n=0:Nwakes], 4 * a[j,n] * (1 - a[j,n])^2
                     Pcap[j=1:cols, n=0:Nwakes], CP[j,n] * u[j,n]^3
                     Pgen[n=0:Nwakes], sum(Pcap[j,n] for j=1:cols)
                 end)
### REFORMULATION END ###

JuMP.@objective(model, Min, sum( (KP * Pgen[n] - Pref_var[1+n])^2 for n=0:Nwakes))

JuMP.optimize!(model)
JuMP.assert_is_solved_and_feasible(model)

However, this did not seem to change the results and also made the problem harder to solve for other sets of problem data. I also tried various combinations of changing the expressions into variables and constraints, but these also seemed to make the problem harder to solve. I would appreciate any suggestions here. Thanks!

Can you provide a fully reproducible example? What is model2?

What is not very close?

If I rename model2 to model, I get:

julia> @show dadϕ;
dadϕ = -1.5139167459828575e-7

julia> @show dadϕ_fdm;
dadϕ_fdm = ([-5.845569989199273e-6],)

That seems fairly close to me? (effectively, 0)