OptimizationMOI Ipopt violating inequality constraint

Apparently, I still cannot get inequality constraints working, but now using Optimization.jl and IPOPT via the OptimizationMOI package.

Problem

The problem is that IPOPT violates my inequality constraint during a step, which then causes an exception in the objective function. The objective function calculates log(variance), and the constraint is there to prevent variance from being negative.

  1. IPOPT violates this constraint:
    1. a - 1/dt must be negative;
    2. During optimization I get a - 1/dt == 32.36, which is not negative;
  2. variance becomes negative: -2.4941978436429695 < 0,
  3. the log function raises a DomainError.

I’m specifically using IPOPT because it’s an interior point optimizer, so it should never violate constraints, AFAIK. Yet it does.

Code

import Pkg
Pkg.activate(temp=true)
Pkg.add([
    Pkg.PackageSpec(name="Optimization", version="3.10.0"),
    Pkg.PackageSpec(name="OptimizationMOI", version="0.1.5"),
    Pkg.PackageSpec(name="Ipopt", version="1.1.0"),
])

import Optimization
import OptimizationMOI, Ipopt

const AV{T} = AbstractVector{T}

function model_constraints!(out::AV{<:Real}, u::AV{<:Real}, data)
    # Model parameters
    dt, a, b = u
    out[1] = a - 1/dt # Must be < 0
    @info "Must be NEGATIVE: $(out[1])"
end

function model_variance(u::AV{T}, data::AV{<:Real}) where T<:Real
    # Model parameters
    dt, a, b = u
    # Compute variance
    variance = zeros(T, length(data))
    variance[1] = one(T)
    for t in 1:(length(data) - 1)
        variance[t+1] = (1 - dt * a) * variance[t] + dt * data[t]^2 + dt * b
    end
    variance
end

function model_loss(u::AV{T}, data::AV{<:Real})::T where T<:Real
    variance = model_variance(u, data)
    loglik::T = zero(T)
    for (r, var) in zip(data, variance)
        loglik += -(log(2Ď€) + log(var) + r^2 / var) / 2
    end
    -loglik / length(data)
end

function model_fit(u0::AV{T}, data::AV{<:Real}) where T<:Real
    func = Optimization.OptimizationFunction(
        model_loss, Optimization.AutoForwardDiff(),
        cons=model_constraints!
    )
    prob = Optimization.OptimizationProblem(
        func, u0, data,
        # 0 < dt < 1 && 1 < a < Inf && 0 < b < Inf
        lb=T[0.0, 1.0, 0.0], ub=T[1.0, Inf, Inf],
        #    ^dt  ^a   ^b         ^dt  ^a   ^b  <= model parameters 
        lcons=T[-Inf], ucons=T[0.0] # a - 1/dt < 0
    )
    sol = Optimization.solve(prob, Ipopt.Optimizer())
    sol.u
end

let 
    data = [
        2.1217711584057386, -0.28350145551002465, 2.3593492969513004, 0.192856733601849, 0.4566485836385113, 1.332717934013979, -1.286716619379847, 0.9868669960185211, 2.2358674776395224, -2.7933975791568098,
        1.2555871497124622, 1.276879759908467, -0.8392016987911409, -1.1580875182201849, 0.33201646080578456, -0.17212553408696898, 1.1275285626369556, 0.23041139849229036, 1.648423577528424, 2.384823597473343,
        -0.4005518932539747, -1.117737311211693, -0.9490152960583265, -1.1454539355078672, 1.4158585811404159, -0.18926972177257692, -0.2867541528181491, -1.2077459688543788, -0.6397173049620141, 0.66147783407023,
        0.049805188778543466, 0.902540117368457, -0.7018417933284938, 0.47342354473843684, 1.2620345361591596, -1.1483844812087018, -0.06487285080802752, 0.39020117013487715, -0.38454491504165356, 1.5125786171885645,
        -0.6751768274451174, 0.490916740658628, 0.012872300530924086, 0.46532447715746716, 0.34734421531357157, 0.3830452463549559, -0.8730874028738718, 0.4333151627834603, -0.40396180775692375, 2.0794821773418497,
        -0.5392735774960918, 0.6519326323752113, -1.4844713145398716, 0.3688828625691108, 1.010912990717231, 0.5018274939956874, 0.36656889279915833, -0.11403975693239479, -0.6460314660359935, -0.41997005020823147,
        0.9652752515820495, -0.37375868692702047, -0.5780729659197872, 2.642742798278919, 0.5076984117208074, -0.4906395089461916, -1.804352047187329, -0.8596663844837792, -0.7510485548262176, -0.07922589350581195,
        1.7201304839487317, 0.9024493222130577, -1.8216089665357902, 1.3929269238775426, -0.08410752079538407, 0.6423068180438288, 0.6615201016351212, 0.18546977816594887, -0.717521690742993, -1.0224309324751113,
        1.7748350222721971, 0.1929546575877559, -0.1581871639724676, 0.20198379311238596, -0.6919373947349301, -0.9253274269423383, 0.549366272989534, -1.9302106783541606, 0.7197247279281573, -1.220334158468621,
        -0.9187468058921053, -2.1452607604834184, -2.1558650694862687, -0.9387913392336701, -0.676637835687265, -0.16621998352492198, 0.5637177022958897, -0.5258315560278541, 0.8413359958184765, -0.9096866525337141
    ]
    # u0 = [0 < dt < 1, 1 < a < 1/dt, 0 < b < Inf]
    u0 = [0.3, 2.3333333333333335, 0.33333333333333337]
    @assert 0 < u0[1] < 1
    @assert 1 < u0[2] < 1 / u0[1]
    @assert 0 < u0[3] < Inf
    @info "Optimizing..." u0
    model_fit(u0, data)
end

I’m setting up the optimization problem like this:

func = Optimization.OptimizationFunction(
   model_loss, Optimization.AutoForwardDiff(),
   cons=model_constraints!
)
prob = Optimization.OptimizationProblem(
   func, u0, data,
   # 0 < dt < 1 && 1 < a < Inf && 0 < b < Inf
   lb=T[0.0, 1.0, 0.0], ub=T[1.0, Inf, Inf],
   #    ^dt  ^a   ^b         ^dt  ^a   ^b  <= model parameters 
   lcons=T[-Inf], ucons=T[0.0] # a - 1/dt < 0
)

I put model_constraints! into OptimizationFunction, as shown in the docs. lcons and ucons in OptimizationProblem are supposed to say that the output of model_constraints! must be between lcons==-Inf and ucons==0.0. In other words, it must be negative, less than ucons==0.0.

However, at some point it becomes positive.

Output

$ julia-1.8 my_model.jl
  Activating new project at `/var/folders/ys/3h0gnqns4b98zb66_vl_m35m0000gn/T/jl_fD7r9m`
    Updating registry at `~/.julia/registries/General.toml`
   Resolving package versions...
    Updating `/private/var/folders/ys/3h0gnqns4b98zb66_vl_m35m0000gn/T/jl_fD7r9m/Project.toml`
  [b6b21f68] + Ipopt v1.1.0
  [7f7a1694] + Optimization v3.10.0
  [fd9f6733] + OptimizationMOI v0.1.5
...
┌ Info: Optimizing...
│   u0 =
│    3-element Vector{Float64}:
│     0.3
│     2.3333333333333335
â””     0.33333333333333337
...
This is Ipopt version 3.14.4, running with linear solver MUMPS 5.4.1.

Number of nonzeros in equality constraint Jacobian...:        0
Number of nonzeros in inequality constraint Jacobian.:        3
Number of nonzeros in Lagrangian Hessian.............:        6

[ Info: Must be NEGATIVE: Dual{ForwardDiff.Tag{Optimization.var"#65#82"{Int64}, Float64}}(-1.0,11.111111111111112,1.0,0.0)
[ Info: Must be NEGATIVE: -1.0
[ Info: Must be NEGATIVE: Dual{ForwardDiff.Tag{Optimization.var"#65#82"{Int64}, Float64}}(-1.0,11.111111111111112,1.0,0.0)
Total number of variables............................:        3
                     variables with only lower bounds:        2
                variables with lower and upper bounds:        1
                     variables with only upper bounds:        0
Total number of equality constraints.................:        0
Total number of inequality constraints...............:        1
        inequality constraints with only lower bounds:        0
   inequality constraints with lower and upper bounds:        0
        inequality constraints with only upper bounds:        1
...
iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
   0  1.8621407e+00 0.00e+00 2.10e+00  -1.0 0.00e+00    -  0.00e+00 0.00e+00   0
...
   9  1.5072838e+00 3.55e-01 3.44e-02  -1.7 5.79e+01    -  6.48e-01 1.00e+00H  1
[ Info: Must be NEGATIVE: -283.8381149547038
[ Info: Must be NEGATIVE: 32.36098192303031
ERROR: LoadError: DomainError with -2.4941978436429695:
log will only return a complex result if called with a complex argument. Try log(Complex(x)).
Stacktrace:
  [1] throw_complex_domainerror(f::Symbol, x::Float64)
    @ Base.Math ./math.jl:33
  [2] _log(x::Float64, base::Val{:â„Ż}, func::Symbol)
    @ Base.Math ./special/log.jl:301
  [3] log
    @ ./special/log.jl:267 [inlined]
  [4] model_loss(u::Vector{Float64}, data::Vector{Float64})
    @ Main my_model.jl:37
  [5] OptimizationFunction
    @ ~/.julia/packages/SciMLBase/QqtZA/src/scimlfunctions.jl:3580 [inlined]
  [6] eval_objective(moiproblem::OptimizationMOI.MOIOptimizationProblem{Float64, SciMLBase.OptimizationFunction{true, Optimization.AutoForwardDiff{nothing}, typeof(model_loss), Optimization.var"#57#74"{ForwardDiff.GradientConfig{ForwardDiff.Tag{Optimization.var"#56#73"{SciMLBase.OptimizationFunction{true, Optimization.AutoForwardDiff{nothing}, typeof(model_loss), Nothing, Nothing, Nothing, typeof(model_constraints!), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED_NO_TIME), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Vector{Float64}}, Float64}, Float64, 3, Vector{ForwardDiff.Dual{ForwardDiff.Tag{Optimization.var"#56#73"{SciMLBase.OptimizationFunction{true, Optimization.AutoForwardDiff{nothing}, typeof(model_loss), Nothing, Nothing, Nothing, typeof(model_constraints!), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED_NO_TIME), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Vector{Float64}}, Float64}, Float64, 3}}}, Optimization.var"#56#73"{SciMLBase.OptimizationFunction{true, Optimization.AutoForwardDiff{nothing}, typeof(model_loss), Nothing, Nothing, Nothing, typeof(model_constraints!), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED_NO_TIME), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Vector{Float64}}}, Optimization.var"#60#77"{ForwardDiff.HessianConfig{ForwardDiff.Tag{Optimization.var"#56#73"{SciMLBase.OptimizationFunction{true, Optimization.AutoForwardDiff{nothing}, typeof(model_loss), Nothing, Nothing, Nothing, typeof(model_constraints!), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED_NO_TIME), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Vector{Float64}}, Float64}, Float64, 3, Vector{ForwardDiff.Dual{ForwardDiff.Tag{Optimization.var"#56#73"{SciMLBase.OptimizationFunction{true, Optimization.AutoForwardDiff{nothing}, typeof(model_loss), Nothing, Nothing, Nothing, typeof(model_constraints!), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED_NO_TIME), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Vector{Float64}}, Float64}, ForwardDiff.Dual{ForwardDiff.Tag{Optimization.var"#56#73"{SciMLBase.OptimizationFunction{true, Optimization.AutoForwardDiff{nothing}, typeof(model_loss), Nothing, Nothing, Nothing, typeof(model_constraints!), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED_NO_TIME), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Vector{Float64}}, Float64}, Float64, 3}, 3}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{Optimization.var"#56#73"{SciMLBase.OptimizationFunction{true, Optimization.AutoForwardDiff{nothing}, typeof(model_loss), Nothing, Nothing, Nothing, typeof(model_constraints!), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED_NO_TIME), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Vector{Float64}}, Float64}, Float64, 3}}}, Optimization.var"#56#73"{SciMLBase.OptimizationFunction{true, Optimization.AutoForwardDiff{nothing}, typeof(model_loss), Nothing, Nothing, Nothing, typeof(model_constraints!), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED_NO_TIME), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Vector{Float64}}}, Optimization.var"#63#80", Optimization.var"#64#81"{SciMLBase.OptimizationFunction{true, Optimization.AutoForwardDiff{nothing}, typeof(model_loss), Nothing, Nothing, Nothing, typeof(model_constraints!), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED_NO_TIME), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Vector{Float64}}, Optimization.var"#66#83"{ForwardDiff.JacobianConfig{ForwardDiff.Tag{Optimization.var"#65#82"{Int64}, Float64}, Float64, 3, Vector{ForwardDiff.Dual{ForwardDiff.Tag{Optimization.var"#65#82"{Int64}, Float64}, Float64, 3}}}}, Optimization.var"#71#88"{Int64, Vector{ForwardDiff.HessianConfig{ForwardDiff.Tag{Optimization.var"#69#86"{Int64}, Float64}, Float64, 3, Vector{ForwardDiff.Dual{ForwardDiff.Tag{Optimization.var"#69#86"{Int64}, Float64}, ForwardDiff.Dual{ForwardDiff.Tag{Optimization.var"#69#86"{Int64}, Float64}, Float64, 3}, 3}}, Vector{ForwardDiff.Dual{ForwardDiff.Tag{Optimization.var"#69#86"{Int64}, Float64}, Float64, 3}}}}, Vector{Optimization.var"#69#86"{Int64}}}, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED_NO_TIME), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Vector{Float64}, Vector{Float64}, Matrix{Float64}, Matrix{Float64}, Matrix{Float64}}, x::Vector{Float64})
    @ OptimizationMOI ~/.julia/packages/OptimizationMOI/ZoxrV/src/OptimizationMOI.jl:78
  [7] eval_objective(model::Ipopt.Optimizer, x::Vector{Float64})
    @ Ipopt ~/.julia/packages/Ipopt/rQctM/src/MOI_wrapper.jl:514
  [8] (::Ipopt.var"#eval_f_cb#1"{Ipopt.Optimizer})(x::Vector{Float64})
    @ Ipopt ~/.julia/packages/Ipopt/rQctM/src/MOI_wrapper.jl:597
  [9] _Eval_F_CB(n::Int32, x_ptr::Ptr{Float64}, x_new::Int32, obj_value::Ptr{Float64}, user_data::Ptr{Nothing})
    @ Ipopt ~/.julia/packages/Ipopt/rQctM/src/C_wrapper.jl:38
 [10] IpoptSolve(prob::Ipopt.IpoptProblem)
    @ Ipopt ~/.julia/packages/Ipopt/rQctM/src/C_wrapper.jl:442
 [11] optimize!(model::Ipopt.Optimizer)
    @ Ipopt ~/.julia/packages/Ipopt/rQctM/src/MOI_wrapper.jl:727
 [12] __solve(prob::SciMLBase.OptimizationProblem{true, SciMLBase.OptimizationFunction{true, Optimization.AutoForwardDiff{nothing}, typeof(model_loss), Nothing, Nothing, Nothing, typeof(model_constraints!), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, typeof(SciMLBase.DEFAULT_OBSERVED_NO_TIME), Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing}, Vector{Float64}, Vector{Float64}, Vector{Float64}, Vector{Float64}, Nothing, Vector{Float64}, Vector{Float64}, Nothing, Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}}, opt::Ipopt.Optimizer; maxiters::Nothing, maxtime::Nothing, abstol::Nothing, reltol::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ OptimizationMOI ~/.julia/packages/OptimizationMOI/ZoxrV/src/OptimizationMOI.jl:329
 [13] __solve
    @ ~/.julia/packages/OptimizationMOI/ZoxrV/src/OptimizationMOI.jl:276 [inlined]
 [14] #solve#540
    @ ~/.julia/packages/SciMLBase/QqtZA/src/solve.jl:84 [inlined]
 [15] solve
    @ ~/.julia/packages/SciMLBase/QqtZA/src/solve.jl:78 [inlined]
 [16] model_fit(u0::Vector{Float64}, data::Vector{Float64})
    @ Main my_model.jl:54
 [17] top-level scope
    @ my_model.jl:77
in expression starting at my_model.jl:58

This bit shows the moment the constraint was violated:

[ Info: Must be NEGATIVE: -283.8381149547038
[ Info: Must be NEGATIVE: 32.36098192303031
ERROR: LoadError: DomainError with -2.4941978436429695
  • The first two lines are values of the constraint a - 1/dt, which must be negative.
  • However, the second line shows that it became positive!
  • The moment the constraint becomes positive, one of the variances becomes negative: -2.4941978436429695, but it’s not possible to take the log of a non-complex negative value, so I get the error.

Question

What am I doing wrong here? I must be setting up constraints incorrectly, but it seems exactly the same as in Optimization.jl’s documentation.

How to make IPOPT always stay inside the constrained region? I can’t step outside this region because otherwise the variance will become negative.

Package versions
  • Julia v1.8.3
  • Optimization.jl v3.10.0
  • OptimizationMOI 0.1.5
  • Ipopt v1.1.0
2 Likes

That is a frequent misconception. If a problem has a constraint g(x) \leq 0, for an interior-point method to ensure satisfaction of the constraint throughout, it would have to penalize g(x) in the logarithmic barrier. However, that’s not what IPOPT does. Firstly, IPOPT adds slack variables s to transform the constraint to g(x) + s = 0 and s \geq 0. Secondly, the inequality on s is penalized via the log barrier, i.e., s > 0 (strictly) is maintained throughout. BUT g(x) + s = 0 is NOT maintained; that constraint is linearized at each iteration to compute a step and is penalized overall to ensure (approximate) satisfaction in the end.

I don’t know of a reliable large-scale interior-point solver that penalizes g directly (it would be doable).

KNITRO has an option such that as soon as a point satisfying g(x) < 0 has been found, then g(x) < 0 is maintained at all subsequent iterations. In particular, if you can provide a starting point that satisfies g(x) < 0, you should be all set. IPOPT has no such option.

4 Likes

@dpo gave you a great answer.
Now I would expect IPOPT to backtrack (during the line search) when the functions cannot be evaluated at the trial iterate. Maybe you can find out about this?

Yep, that’s what I’ve learned from some optimization courses. They all explain penalty methods, logarithmic barriers etc.

Right, I studied slack variables as well.

Yes, I guess it linearizes the entire Lagrangian to compute the step direction. However, I thought the constraints were supposed to be satisfied at all times, not just in the end. Apparently, my assumption was incorrect.

So what do I do? Knitro is paid, and my optimization problem is really simple: 3 variables, 3 bounds and 1 inequality constraint. I definitely don’t want to pay for solving it. Are there any other solvers that always satisfy the constraint, even during all intermediate steps?


As a side note, what’s the point of IPOPT being an Interior Point OPTimizer when it produces points which are not in the interior of my constraints???

That’s a fair question. If you look at the assumptions underlying IPOPT, they say that all problem functions should be defined and differentiable over the entire space (\mathbb{R}^n). IPOPT belongs to the category of infeasible interior-point methods, first developed for linear optimization.

Feasible interior-point methods also exist in the literature, but are not typically implemented because they first require finding a strictly feasible initial guess (g(x) < 0), which can be as difficult as solving the problem in the first place. But of course, as is the case in your problem, if the objective is only defined when g(x) < 0, feasible methods are the only option.

All the details for implementing an efficient feasible interior-point method are in the literature. Alas, that’s a fairly nontrivial and time-consuming task.

You could get a trial version of KNITRO for free, at least to see if you manage to solve your problem to your satisfaction with it.

EDIT: since your problem is so small, you may be satisfied with a “prototype” implementation of a feasible interior-point method, one that doesn’t care about sparse linear algebra. It wouldn’t be too hard to get a preliminary version going.

I also notice in your original post that you have a strict inequality constraint. Optimization mostly considers closed feasible sets, so you’re unlikely to find a method that guarantees that the inequality will be satisfied strictly. However, feasible interior-point methods are probably your best bet.

Python’s IPOPT wrapper has (almost) no problem optimizing this:

from jax.config import config
config.update("jax_enable_x64", True)

import jax
import jax.numpy as jnp
import cyipopt

def FoldList(f, x, lst):
    """
    `FoldList(f, x, [a, b, ...])`
    Creates array `[x, f(x, a), f(f(x, a), b), ...]`
    """
    _, result = jax.lax.scan(
        lambda carry, new: (carr:=f(carry, new), carr),
        x, lst
    )
    return jnp.concatenate((
        jnp.array([x]), result
    ))

@jax.jit
def model_constraints(u: jnp.ndarray, data):
    dt, a, b = u
    return -(a - 1/dt) # must be >0

@jax.jit
def model_variance(u: jnp.ndarray, data: jnp.ndarray):
    dt, a, b = u
    return FoldList(
        lambda var, ret: (1 - dt * a) * var + dt * ret**2 + dt * b,
        1.0, data
    )[:-1]

@jax.jit
def model_loss(u: jnp.ndarray, data: jnp.ndarray):
    variance = model_variance(u, data)
    loglik = (
        -(jnp.log(2 * jnp.pi) + jnp.log(variance) + data**2 / variance) / 2
    ).mean()
    return -loglik

if __name__ == '__main__':
    import matplotlib.pyplot as plt
    data = [
        2.1217711584057386, -0.28350145551002465, 2.3593492969513004, 0.192856733601849, 0.4566485836385113, 1.332717934013979, -1.286716619379847, 0.9868669960185211, 2.2358674776395224, -2.7933975791568098,
        1.2555871497124622, 1.276879759908467, -0.8392016987911409, -1.1580875182201849, 0.33201646080578456, -0.17212553408696898, 1.1275285626369556, 0.23041139849229036, 1.648423577528424, 2.384823597473343,
        -0.4005518932539747, -1.117737311211693, -0.9490152960583265, -1.1454539355078672, 1.4158585811404159, -0.18926972177257692, -0.2867541528181491, -1.2077459688543788, -0.6397173049620141, 0.66147783407023,
        0.049805188778543466, 0.902540117368457, -0.7018417933284938, 0.47342354473843684, 1.2620345361591596, -1.1483844812087018, -0.06487285080802752, 0.39020117013487715, -0.38454491504165356, 1.5125786171885645,
        -0.6751768274451174, 0.490916740658628, 0.012872300530924086, 0.46532447715746716, 0.34734421531357157, 0.3830452463549559, -0.8730874028738718, 0.4333151627834603, -0.40396180775692375, 2.0794821773418497,
        -0.5392735774960918, 0.6519326323752113, -1.4844713145398716, 0.3688828625691108, 1.010912990717231, 0.5018274939956874, 0.36656889279915833, -0.11403975693239479, -0.6460314660359935, -0.41997005020823147,
        0.9652752515820495, -0.37375868692702047, -0.5780729659197872, 2.642742798278919, 0.5076984117208074, -0.4906395089461916, -1.804352047187329, -0.8596663844837792, -0.7510485548262176, -0.07922589350581195,
        1.7201304839487317, 0.9024493222130577, -1.8216089665357902, 1.3929269238775426, -0.08410752079538407, 0.6423068180438288, 0.6615201016351212, 0.18546977816594887, -0.717521690742993, -1.0224309324751113,
        1.7748350222721971, 0.1929546575877559, -0.1581871639724676, 0.20198379311238596, -0.6919373947349301, -0.9253274269423383, 0.549366272989534, -1.9302106783541606, 0.7197247279281573, -1.220334158468621,
        -0.9187468058921053, -2.1452607604834184, -2.1558650694862687, -0.9387913392336701, -0.676637835687265, -0.16621998352492198, 0.5637177022958897, -0.5258315560278541, 0.8413359958184765, -0.9096866525337141
    ]
    u0 = [0.3, 2.3333333333333335, 0.33333333333333337]
    data, up = map(jnp.array, (data, u0))

    loss_onearg = jax.jit(lambda u: model_loss(u, data))
    constraints_onearg = jax.jit(lambda u: model_constraints(u, data))
    constraints_onearg_hess = jax.jit(jax.hessian(constraints_onearg))
    constraints_onearg_hessvp = jax.jit(
        lambda u, v: constraints_onearg_hess(u) * v[0]
    )
    res = cyipopt.minimize_ipopt(
        loss_onearg, u0,
        jac=jax.jit(jax.jacfwd(loss_onearg)),
        hess=jax.jit(jax.hessian(loss_onearg)),
        bounds=[(0, 1), (1, None), (0, None)],
        constraints=[{
            'type': 'ineq',
            'fun': constraints_onearg,
            'jac': jax.jit(jax.jacfwd(constraints_onearg)),
            'hess': constraints_onearg_hessvp
        }], tol=1e-6
    )
    print(res.x)

Output:

>>> print(res.x)
[0.0996877  6.94179351 6.81744486]

These aren’t the parameters I was hoping to recover from these data, but whatever, at least it doesn’t throw errors!

We’re talking about nonlinear constraints here, that’s not so trivial :wink: IPOPT is interior wrt the primal bound constraints (x and the slacks s), as well as the Lagrange multipliers of the bound constraints.

You could try with a different starting point, maybe you’ll have more luck.
Otherwise, try a deterministic global optimization solver (BARON, Couenne, SCIP, etc).

FWIW, JuMP has no problem either:

julia> using JuMP

julia> import Ipopt

julia> function main(data)
           model = Model(Ipopt.Optimizer)
           @variable(model, 0 <= dt <= 1)
           @variable(model, 1 <= a)
           @variable(model, 0 <= b)
           @NLconstraint(model, a <= 1 / dt)
           variance = Any[]
           push!(variance, 1.0)
           for t in 2:length(data)
               push!(
                   variance, 
                   @NLexpression(model, (1 - dt * a) * variance[t-1] + dt * data[t-1]^2 + dt * b),
               )
           end
           N = length(data)
           @NLobjective(
               model, 
               Min, 
               sum(log(2Ď€ * var) + d^2 / var for (d, var) in zip(data, variance)) / 2N,
           )
           optimize!(model)
           return [value(dt), value(a), value(b)]
       end
main (generic function with 1 method)

julia> data = [
           2.1217711584057386, -0.28350145551002465, 2.3593492969513004, 0.192856733601849, 0.4566485836385113, 1.332717934013979, -1.286716619379847, 0.9868669960185211, 2.2358674776395224, -2.7933975791568098,
           1.2555871497124622, 1.276879759908467, -0.8392016987911409, -1.1580875182201849, 0.33201646080578456, -0.17212553408696898, 1.1275285626369556, 0.23041139849229036, 1.648423577528424, 2.384823597473343,
           -0.4005518932539747, -1.117737311211693, -0.9490152960583265, -1.1454539355078672, 1.4158585811404159, -0.18926972177257692, -0.2867541528181491, -1.2077459688543788, -0.6397173049620141, 0.66147783407023,
           0.049805188778543466, 0.902540117368457, -0.7018417933284938, 0.47342354473843684, 1.2620345361591596, -1.1483844812087018, -0.06487285080802752, 0.39020117013487715, -0.38454491504165356, 1.5125786171885645,
           -0.6751768274451174, 0.490916740658628, 0.012872300530924086, 0.46532447715746716, 0.34734421531357157, 0.3830452463549559, -0.8730874028738718, 0.4333151627834603, -0.40396180775692375, 2.0794821773418497,
           -0.5392735774960918, 0.6519326323752113, -1.4844713145398716, 0.3688828625691108, 1.010912990717231, 0.5018274939956874, 0.36656889279915833, -0.11403975693239479, -0.6460314660359935, -0.41997005020823147,
           0.9652752515820495, -0.37375868692702047, -0.5780729659197872, 2.642742798278919, 0.5076984117208074, -0.4906395089461916, -1.804352047187329, -0.8596663844837792, -0.7510485548262176, -0.07922589350581195,
           1.7201304839487317, 0.9024493222130577, -1.8216089665357902, 1.3929269238775426, -0.08410752079538407, 0.6423068180438288, 0.6615201016351212, 0.18546977816594887, -0.717521690742993, -1.0224309324751113,
           1.7748350222721971, 0.1929546575877559, -0.1581871639724676, 0.20198379311238596, -0.6919373947349301, -0.9253274269423383, 0.549366272989534, -1.9302106783541606, 0.7197247279281573, -1.220334158468621,
           -0.9187468058921053, -2.1452607604834184, -2.1558650694862687, -0.9387913392336701, -0.676637835687265, -0.16621998352492198, 0.5637177022958897, -0.5258315560278541, 0.8413359958184765, -0.9096866525337141
       ];

julia> main(data)
This is Ipopt version 3.14.4, running with linear solver MUMPS 5.4.1.

Number of nonzeros in equality constraint Jacobian...:        0
Number of nonzeros in inequality constraint Jacobian.:        2
Number of nonzeros in Lagrangian Hessian.............:        7

Total number of variables............................:        3
                     variables with only lower bounds:        2
                variables with lower and upper bounds:        1
                     variables with only upper bounds:        0
Total number of equality constraints.................:        0
Total number of inequality constraints...............:        1
        inequality constraints with only lower bounds:        0
   inequality constraints with lower and upper bounds:        0
        inequality constraints with only upper bounds:        1

iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
   0  1.5131731e+00 0.00e+00 1.00e+00  -1.0 0.00e+00    -  0.00e+00 0.00e+00   0
   1  1.5321040e+00 0.00e+00 5.05e-01  -1.0 1.01e-01    -  9.39e-01 9.89e-01f  1
   2  1.5317504e+00 0.00e+00 2.48e+01  -1.7 6.55e-01    -  3.73e-01 4.95e-01f  2
   3  1.5215004e+00 0.00e+00 2.73e+01  -1.7 9.03e-01    -  5.96e-01 4.95e-01f  2
   4  1.5205462e+00 0.00e+00 6.79e+00  -1.7 6.96e-02   0.0 1.00e+00 9.92e-01h  1
   5  1.5181044e+00 0.00e+00 4.94e-02  -1.7 1.46e-01  -0.5 1.00e+00 1.00e+00h  1
   6  1.5132940e+00 0.00e+00 4.74e+04  -5.7 1.93e-01    -  6.26e-01 1.00e+00f  1
   7  1.5112242e+00 0.00e+00 4.61e+00  -5.7 1.09e-01  -1.0 8.87e-01 1.00e+00f  1
   8  1.5089847e+00 0.00e+00 3.52e+01  -5.7 4.53e+00    -  1.83e-01 8.56e-02f  1
   9  1.5063204e+00 0.00e+00 1.74e+04  -5.7 1.09e+00    -  6.30e-03 1.00e+00f  1
iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
  10  1.5061881e+00 0.00e+00 6.16e-02  -5.7 3.85e-02  -1.4 9.99e-01 1.00e+00h  1
  11  1.5060385e+00 0.00e+00 1.19e-02  -5.7 1.54e-02  -0.1 1.00e+00 1.00e+00f  1
  12  1.5054644e+00 0.00e+00 2.89e+01  -5.7 2.56e+01  -0.6 4.07e-03 2.56e-03f  1
  13  1.5043945e+00 0.00e+00 8.41e+04  -5.7 1.02e+00    -  1.51e-03 1.00e+00f  1
  14  1.5029687e+00 0.00e+00 3.43e-03  -5.7 1.20e+00    -  1.00e+00 1.00e+00f  1
  15  1.5027443e+00 0.00e+00 3.28e-03  -5.7 1.08e+00    -  1.00e+00 1.00e+00f  1
  16  1.5027038e+00 0.00e+00 4.11e-04  -5.7 6.30e-01    -  1.00e+00 1.00e+00f  1
  17  1.5027011e+00 0.00e+00 6.90e-05  -5.7 2.29e-01    -  1.00e+00 1.00e+00h  1
  18  1.5027011e+00 0.00e+00 5.66e-07  -5.7 2.21e-02    -  1.00e+00 1.00e+00h  1
  19  1.5027011e+00 0.00e+00 2.87e-08  -8.6 4.55e-03    -  1.00e+00 1.00e+00h  1
iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
  20  1.5027011e+00 0.00e+00 3.72e-14  -8.6 4.75e-06    -  1.00e+00 1.00e+00h  1

Number of Iterations....: 20

                                   (scaled)                 (unscaled)
Objective...............:   1.5027011040525426e+00    1.5027011040525426e+00
Dual infeasibility......:   3.7163990047232110e-14    3.7163990047232110e-14
Constraint violation....:   0.0000000000000000e+00    0.0000000000000000e+00
Variable bound violation:   0.0000000000000000e+00    0.0000000000000000e+00
Complementarity.........:   2.5059035572090069e-09    2.5059035572090069e-09
Overall NLP error.......:   2.5059035572090069e-09    2.5059035572090069e-09


Number of objective function evaluations             = 23
Number of objective gradient evaluations             = 21
Number of equality constraint evaluations            = 0
Number of inequality constraint evaluations          = 23
Number of equality constraint Jacobian evaluations   = 0
Number of inequality constraint Jacobian evaluations = 21
Number of Lagrangian Hessian evaluations             = 20
Total seconds in IPOPT                               = 0.014

EXIT: Optimal Solution Found.
3-element Vector{Float64}:
 0.09968760243264961
 6.941809365898011
 6.817463190447395
3 Likes
  • BARON is paid, but I’m trying to do some basic nonlinear constrained optimization for basic research here, so I really don’t want to pay. Especially considering that Python’s cyipopt works, but Julia’s Ipopt.jl doesn’t.

  • SCIP is apparently free and even open source and proudly sports the nonlinear-optimization tag on GitHub. However, it doesn’t support nonlinear objectives:

    Nonlinear objective functions are not supported by SCIP and must be modeled as constraint function. Note, that the support for non-quadratic nonlinear constraints is not yet as robust as the rest of SCIP. Missing bounds on nonlinear variables and tiny or huge coefficients can easily lead to numerical problems, which can be avoided by careful modeling.

    Weird flex, but OK, I can put my objective function in constraints. Support for non-quadratic nonlinear constraints is not robust? Numerical problems?! Doesn’t sound particularly promising, but let’s see…

I assume that what’s meant by “modelling nonlinear objective functions as constraints” is this:

\begin{aligned} \min_{\Delta t, a, b, F} &F\\ \text{s.t.} \, &F=f(\Delta t, a, b)\\ & 0 < \Delta t < 1\\ & 1 < a < \frac1{\Delta t}\\ & b > 0 \end{aligned}

So, I’m minimizing a linear function (literally one single variable F by itself) while requiring that F be equal to f(\Delta t, a, b), which is my complicated nonlinear function.


I put the design values into u0 in this order:

u0 = [dt0, a0, b0, loss([dt0, a0, b0], data)]

Here are the constraints and optimization functions:

function model_constraints!(out::AV{<:Real}, u::AV{<:Real}, data)
    # Model parameters
    dt, a, b, objective = u
    out[1] = a - 1/dt # Must be > 0
    out[end] = model_loss(u, data) - objective # Must be == 0
end

function model_fit(u0::AV{T}, data::AV{<:Real}) where T<:Real
    u0 = [u0[begin:end-1]; model_loss(u0, data)]

    func = Optimization.OptimizationFunction(
        (u, _) -> u[end], Optimization.AutoForwardDiff(),
        cons=model_constraints!
    )
    prob = Optimization.OptimizationProblem(
        func, u0, data,
        # 0 < dt < 1 && 1 < a < Inf && 0 < b < Inf && -Inf < loss < Inf
        lb=T[0.0, 1.0, 0.0, -Inf], ub=T[1.0, Inf, Inf, Inf],
        #    ^dt  ^a   ^b   ^loss       ^dt  ^a   ^b   ^loss  <= model parameters 
        lcons=T[0.0, 0.0], ucons=T[Inf, 0.0] # a - 1/dt > 0
    )
    sol = Optimization.solve(prob, SCIP.Optimizer())
    sol.u
end

The objective function is literally (u, _) -> u[end], which is as linear as it gets. However, Optimization.OptimizationFunction probably can’t tell that it’s linear. Indeed:

julia> model_fit(u0, data)
ERROR: Nonlinear objective not supported by SCIP.jl!
Stacktrace:
 [1] error(s::String)
   @ Base ./error.jl:35
 [2] set(o::SCIP.Optimizer, #unused#::MathOptInterface.NLPBlock, data::MathOptInterface.NLPBlockData)
   @ SCIP ~/.julia/packages/SCIP/BUSgq/src/MOI_wrapper/nonlinear_constraints.jl:10

I couldn’t find a way of telling Optimization.jl that the objective function is linear.


Why is this suddenly so complicated?? People are fitting huge neural networks with billions of parameters using simple variants of basic SGD, and everyone’s happy. Sure, that’s unconstrained optimization. So once I add a simple nonlinear constraint, I have to jump through hoops in SCIP and pray to IPOPT that my constraints will be satisfied… In the end, none of the two work:^)

I think there’s some funny stuff going on in Ipopt.jl. If the objective function throws a DomainError (which is an extremely descriptive name!), the optimizer should reconsider its actions and make smaller steps, IMO.

Especially considering that Python’s cyipopt works , but Julia’s Ipopt.jl doesn’t.

The interface to Ipopt.jl works fine (see it working with JuMP). Your issue is the Optimization.jl interface to MOI and then to Ipopt.

Yes. This is a problem with Optimization.jl. There are a number of related open issues:

Currently, if you want to use one of the MOI-interfaced solvers, going through JuMP is probably simpler.

2 Likes

That’s a big overstatement without an asterisk. Can you show how to put a neural ODE into JuMP?

It’s more like, if you have simple arithmetic operations (like this example) then a symbolic modeling interface (like JuMP) will be able to do a better job because it can recognize additional structure (like the linearity of the constraints) to make the solving simpler. But if you have more nonlinear Julia code thrown around, especially with large arrays, then the symbolic interfaces don’t do so well.

That said,

None of these issues get to the heart of the problem here. If Python’s is able to solve it without telling it the constraints are linear, then Optimization.jl should be able to too. So while yes I’d agree that a symbolic modeling interface makes a lot of cases simpler to optimize and if you apply that here then it will be easier to solve, why is that needed here? That’s a rather interesting question that should be answered. Is it somewhat random given that with slack variables it can violate the constraints, and it just happens to not do so in the Python implementation? Is there some difference in the default options or settings?Is it due to the solvers giving different information, i.e. for Optimization it has the Hessian and HVP, but in Python’s it only has the HVP?

Ah, right, Ipopt.jl is a wrapper, so I guess if JuMP succeeds, the wrapper’s fine… However, would this mean that Optimization.jl doesn’t use Ipopt.jl properly?

My issue with JuMP is that I’d like my functions to be written in Julia, not a modelling language. Not exactly sure why that is, but I like when my functions don’t depend on anything - they’re just ordinary functions. I could plot them, I could autodiff them, I could use them as parts of ODEs or whatever. They don’t need to “know” that they’re parts of an optimization problem. That’s why I have model_constraints!, model_variance and model_loss as different functions: I could write new ones and compose them however I need.

With JuMP, I guess I could do something like this:

function model_variance(u::AV, data::AV{<:Real}, model=Model())
    dt, a, b = u
    variance = Vector{JuMP.NonlinearExpression}(undef, length(data))
    variance[1] = @NLexpression(model, 1.0)
    for t in 1:(length(data) - 1)
        variance[t+1] = @NLexpression(
            model,
            (1 - dt * a) * variance[t] + dt * data[t]^2 + dt * b
        )
    end
    variance
end

This is almost exactly like my original code, but having to pass model around and the @NLexpressions are kind of bothering me. Now model_variance is tied to JuMP and won’t work without a model object. I could, of course, obtain the values of variance from main by also returning value.(variance):

function main(data)
    model = Model(Ipopt.Optimizer)
    # ...
    variance = model_variance([dt, a, b], data, model)
    # ...
    optimize!(model)
    (params=value.([dt, a, b]), variance=value.(variance))
end

I can also compute this variance for arbitrary parameters:

model_variance([0.1, 0.3, 0.5], data) .|> value

I began writing this comment like half an hour ago thinking that JuMP was kinda complicated, but it seems fine now, except for having to put @NLexpression everywhere.


The optimization API I like most is one that lets me put “optimization variables” into any regular function, similar to Wolfram Mathematica:

NMinimize[{
    nastyFunction[0.2, a, b],
    a >= 1 && b >= 0
    }, {a, b}]

This way, I don’t need to construct Models or modify my nastyFunction to make it suitable for optimization (by using @NLexpression, for example). This is essentially what dual numbers do, for example, so they seem very intuitive for me.

Why not give NLPModelsIpopt a try? The whole point is to write problem functions in Julia. There’s a tutorial. I also just opened the Discussions.

If this is your primary desire and you want to use a backend like Ipopt as a solver, NonConvex.jl came to my mind as the Julia framework that has similar design goals. I reviewed a bunch of them here, Survey of Non-Linear Optimization Modeling Layers in Julia

That said, if you want maximum performance and scalability you might want to reconsider the pros and cons of using JuMP. See this discussion, AC Optimal Power Flow in Various Nonlinear Optimization Frameworks - #35 by ccoffrin

I got the exact same error:

import Pkg
Pkg.activate(temp=true)
Pkg.add([
    Pkg.PackageSpec(name="ADNLPModels"),
    Pkg.PackageSpec(name="NLPModelsIpopt")
])

using Statistics
import ADNLPModels, NLPModelsIpopt

const AV{T} = AbstractVector{T}

function FoldList(f::Function, x::T, lst::AbstractVector{<:Real}) where T <: Real
    res = Vector{T}(undef, 1 + length(lst))
    res[1] = x
    for k in eachindex(lst)
        res[k+1] = f(res[k], lst[k])
    end
    res
end

function model_constraints(u::AV{<:Real}, data)
    dt, a, b = u # Model parameters
    out = [a - 1/dt]
    @info "Must be NEGATIVE: $(out)"
    out
end

function model_variance(u::AV{T}, data::AV{<:Real}) where T<:Real
    # Model parameters
    dt, a, b = u
    # Compute variance
    variance = FoldList(
        (var, r) -> (1 - dt * a) * var + dt * r^2 + dt * b,
        one(T), data
    )
    variance[begin:end-1]
end

function model_loss(u::AV{T}, data::AV{<:Real})::T where T<:Real
    variance = model_variance(u, data)
    loglik = mean(
        @. -(log(2Ď€) + log(variance) + data^2 / variance) / 2
    )
    -loglik
end

function model_fit(u0::AV{T}, data::AV{<:Real}) where T<:Real
    problem = ADNLPModels.ADNLPModel(
        u -> model_loss(u, data), # Objective
        u0, T[0.0, 1.0, 0.0], T[1.0, Inf, Inf], # Init value & bounds
        u -> model_constraints(u, data), # Constraints
        T[-Inf], T[0.0] # Constraints bounds (must be NEGATIVE)
    )
    solver = NLPModelsIpopt.IpoptSolver(problem)
    stats = NLPModelsIpopt.solve!(solver, problem, print_level=0)
    stats.solution
end

data = [
    2.1217711584057386, -0.28350145551002465, 2.3593492969513004, 0.192856733601849, 0.4566485836385113, 1.332717934013979, -1.286716619379847, 0.9868669960185211, 2.2358674776395224, -2.7933975791568098,
    1.2555871497124622, 1.276879759908467, -0.8392016987911409, -1.1580875182201849, 0.33201646080578456, -0.17212553408696898, 1.1275285626369556, 0.23041139849229036, 1.648423577528424, 2.384823597473343,
    -0.4005518932539747, -1.117737311211693, -0.9490152960583265, -1.1454539355078672, 1.4158585811404159, -0.18926972177257692, -0.2867541528181491, -1.2077459688543788, -0.6397173049620141, 0.66147783407023,
    0.049805188778543466, 0.902540117368457, -0.7018417933284938, 0.47342354473843684, 1.2620345361591596, -1.1483844812087018, -0.06487285080802752, 0.39020117013487715, -0.38454491504165356, 1.5125786171885645,
    -0.6751768274451174, 0.490916740658628, 0.012872300530924086, 0.46532447715746716, 0.34734421531357157, 0.3830452463549559, -0.8730874028738718, 0.4333151627834603, -0.40396180775692375, 2.0794821773418497,
    -0.5392735774960918, 0.6519326323752113, -1.4844713145398716, 0.3688828625691108, 1.010912990717231, 0.5018274939956874, 0.36656889279915833, -0.11403975693239479, -0.6460314660359935, -0.41997005020823147,
    0.9652752515820495, -0.37375868692702047, -0.5780729659197872, 2.642742798278919, 0.5076984117208074, -0.4906395089461916, -1.804352047187329, -0.8596663844837792, -0.7510485548262176, -0.07922589350581195,
    1.7201304839487317, 0.9024493222130577, -1.8216089665357902, 1.3929269238775426, -0.08410752079538407, 0.6423068180438288, 0.6615201016351212, 0.18546977816594887, -0.717521690742993, -1.0224309324751113,
    1.7748350222721971, 0.1929546575877559, -0.1581871639724676, 0.20198379311238596, -0.6919373947349301, -0.9253274269423383, 0.549366272989534, -1.9302106783541606, 0.7197247279281573, -1.220334158468621,
    -0.9187468058921053, -2.1452607604834184, -2.1558650694862687, -0.9387913392336701, -0.676637835687265, -0.16621998352492198, 0.5637177022958897, -0.5258315560278541, 0.8413359958184765, -0.9096866525337141
];
# u0 = [0 < dt < 1, 1 < a < 1/dt, 0 < b < Inf]
u0 = [0.3, 2.3333333333333335, 0.33333333333333337]

model_variance(u0, data)'
# model_loss(u0, data) = 1.8621407408660917
@show model_loss(u0, data)

model_fit(u0, data)

This is basically the same code as the original, but using this to define the optimization problem:

problem = ADNLPModels.ADNLPModel(
    u -> model_loss(u, data), # Objective
    u0, T[0.0, 1.0, 0.0], T[1.0, Inf, Inf], # Init value & bounds
    u -> model_constraints(u, data), # Constraints
    T[-Inf], T[0.0] # Constraints bounds (must be NEGATIVE)
)

Error message

...
[ Info: Must be NEGATIVE: [-4336.184968523098]
[ Info: Must be NEGATIVE: [-14.365285659283622]
[ Info: Must be NEGATIVE: [0.3545732544028226]
[ Info: Must be NEGATIVE: ForwardDiff.Dual{ForwardDiff.Tag{var"#12#14"{Vector{Float64}}, Float64}, Float64, 3}[Dual{ForwardDiff.Tag{var"#12#14"{Vector{Float64}}, Float64}}(0.3545732544028226,2703.8619284012325,1.0,0.0)]
[ Info: Must be NEGATIVE: ForwardDiff.Dual{ForwardDiff.Tag{ADNLPModels.var"#â„“#72"{Float64, ADNLPModels.ADNLPModel{Float64, Vector{Float64}, Vector{Int64}}, Vector{Float64}}, Float64}, ForwardDiff.Dual{ForwardDiff.Tag{ADNLPModels.var"#â„“#72"{Float64, ADNLPModels.ADNLPModel{Float64, Vector{Float64}, Vector{Int64}}, Vector{Float64}}, Float64}, Float64, 3}, 3}[Dual{ForwardDiff.Tag{ADNLPModels.var"#â„“#72"{Float64, ADNLPModels.ADNLPModel{Float64, Vector{Float64}, Vector{Int64}}, Vector{Float64}}, Float64}}(Dual{ForwardDiff.Tag{ADNLPModels.var"#â„“#72"{Float64, ADNLPModels.ADNLPModel{Float64, Vector{Float64}, Vector{Int64}}, Vector{Float64}}, Float64}}(0.3545732544028226,2703.8619284012325,1.0,0.0),Dual{ForwardDiff.Tag{ADNLPModels.var"#â„“#72"{Float64, ADNLPModels.ADNLPModel{Float64, Vector{Float64}, Vector{Int64}}, Vector{Float64}}, Float64}}(2703.8619284012325,-281194.46110555273,0.0,0.0),Dual{ForwardDiff.Tag{ADNLPModels.var"#â„“#72"{Float64, ADNLPModels.ADNLPModel{Float64, Vector{Float64}, Vector{Int64}}, Vector{Float64}}, Float64}}(1.0,0.0,0.0,0.0),Dual{ForwardDiff.Tag{ADNLPModels.var"#â„“#72"{Float64, ADNLPModels.ADNLPModel{Float64, Vector{Float64}, Vector{Int64}}, Vector{Float64}}, Float64}}(0.0,0.0,0.0,0.0))]
[ Info: Must be NEGATIVE: [-283.8381149547038]
[ Info: Must be NEGATIVE: [32.36098192303031]
ERROR: DomainError with -2.4941978436429695:
log will only return a complex result if called with a complex argument. Try log(Complex(x)).
...

The error message mentions the exact same number -2.4941978436429695 as in my original post.

  • ADNLPModels v0.4.0
  • NLPModelsIpopt v0.10.0

Nonconvex.jl fails to forwarddiffy the model_loss function:

function model_fit_Nonconvex(u0::AV{T}, data::AV{<:Real}) where T<:Real
    model = Nonconvex.Model(u -> model_loss(u, data))
    Nonconvex.addvar!(
        model,
        T[0.0, 1.0, 0.0], T[1.0, Inf, Inf], # Bounds
        init=u0 # Initial guess
    )
    # Constraints must be NEGATIVE
    Nonconvex.add_ineq_constraint!(
        model, u -> model_constraints(u, data)
    )

    Nonconvex.optimize(
        Nonconvex.forwarddiffy(model), NonconvexIpopt.IpoptAlg(),
        u0 # Why do I need to provide the initial guess again?
    )
end

Error message

julia> model_fit_Nonconvex(u0, data)
[ Info: Must be NEGATIVE: [-1.0]
ERROR: MethodError: no method matching model_loss(::Vector{Any}, ::Vector{Float64})
Closest candidates are:
  model_loss(::AbstractVector{T}, ::AbstractVector{<:Real}) where T<:Real at my_model_nonconvex.jl:38
Stacktrace:
  [1] (::var"#27#29"{Vector{Float64}})(u::Vector{Any})
    @ Main my_model_nonconvex.jl:64
  [2] (::NonconvexCore.Objective{var"#27#29"{Vector{Float64}}, Base.RefValue{Float64}, Set{Symbol}})(args::Vector{Any}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ NonconvexCore ~/.julia/packages/NonconvexCore/HPl1t/src/functions/functions.jl:170
  [3] (::NonconvexCore.Objective{var"#27#29"{Vector{Float64}}, Base.RefValue{Float64}, Set{Symbol}})(args::Vector{Any})
    @ NonconvexCore ~/.julia/packages/NonconvexCore/HPl1t/src/functions/functions.jl:169
  [4] tovecfunc(f::Function, x::Vector{Any}; flatteny::Bool)
    @ NonconvexCore ~/.julia/packages/NonconvexCore/HPl1t/src/models/vec_model.jl:78
  [5] tovecfunc(f::Function, x::Vector{Any})
    @ NonconvexCore ~/.julia/packages/NonconvexCore/HPl1t/src/models/vec_model.jl:74
  [6] abstractdiffy(f::Function, backend::AbstractDifferentiation.ForwardDiffBackend{Nothing}, x::Vector{Any})
    @ NonconvexUtils ~/.julia/packages/NonconvexUtils/9C3qt/src/abstractdiff.jl:24
  [7] abstractdiffy(model::NonconvexCore.Model{Vector{Any}}, backend::AbstractDifferentiation.ForwardDiffBackend{Nothing}; objective::Bool, ineq_constraints::Bool, eq_constraints::Bool, sd_constraints::Bool)
    @ NonconvexUtils ~/.julia/packages/NonconvexUtils/9C3qt/src/abstractdiff.jl:31
  [8] abstractdiffy
    @ ~/.julia/packages/NonconvexUtils/9C3qt/src/abstractdiff.jl:28 [inlined]
  [9] forwarddiffy
    @ ~/.julia/packages/NonconvexUtils/9C3qt/src/abstractdiff.jl:22 [inlined]
 [10] model_fit_Nonconvex(u0::Vector{Float64}, data::Vector{Float64})
    @ Main my_model_nonconvex.jl:75
 [11] top-level scope
    @ my_model_nonconvex.jl:101

See full code in my previous comment.

It tried to call model_loss(::Vector{Any}, ::Vector{Float64}), but I defined model_loss to work on Reals (and also ForwardDiff.Duals) only, so there’s no method for design variables u being Any.

  • Nonconvex v2.1.2
  • NonconvexIpopt v0.4.2

EDIT: I made all my functions accept simply u::AbstractVector, and it actually worked:

...
Number of objective function evaluations             = 58
Number of objective gradient evaluations             = 40
Number of equality constraint evaluations            = 0
Number of inequality constraint evaluations          = 58
Number of equality constraint Jacobian evaluations   = 0
Number of inequality constraint Jacobian evaluations = 40
Number of Lagrangian Hessian evaluations             = 0
Total seconds in IPOPT                               = 0.349

EXIT: Optimal Solution Found.
3-element Vector{Float64}:
 0.09968766311484828
 6.941805262268322
 6.817458685169129

BTW, the first call spent more than 5 seconds in IPOPT for some reason. Not sure why that was.

The JuMP run doesn’t seem to be using the OP’s initial guess. The objective value at the first iteration is different. We don’t see the output of the Python run. Something clearly differs in the different scenarios here, but I would say that any successful run on your problem with IPOPT is just pure luck and nothing else. IPOPT isn’t meant to treat objectives that aren’t defined everywhere.

Are there any existing free solvers which are suitable for objectives that aren’t defined everywhere? My entire task revolves around such objectives that involve logarithms of variance, which must be positive at all times, so constraints must be satisfied. At least, the solver can’t just give up when the objective function throws DomainError.

Should I modify the objective function to catch the exception and return +Inf or something? That’ll introduce a discontinuity, though.

I know there are a lot of different solvers (like shown here, for example; I’ve also had some success with SLSQP from scipy.optimize.minimize for complicated constrained minimization), but I don’t know whether any of them always maintain feasibility. Are there any such solvers?


What I’m doing is trying to fit a basic GARCH model. I want to fit this first and then I have plans for extending it for my research. The usual packages for GARCH don’t support such extensions, so I have to roll my own. This is to say that the optimization problem should be pretty basic for “military grade, battle-tested solvers” like IPOPT.

It seems like one can fit GARCH-like models using unconstrained optimization (ARCHModels.jl returns -Inf when constraints aren’t satisfied and uses BFGS by default), but I’m not quite sure that’d work for my modifications, so I decided to reach for proper constrained optimization with trusty IPOPT, but it kinda failed me here. Or it’s NLPModelsIpopt.jl and Optimization.jl’s fault. Or I’m getting random results - I’m not sure what’s going on at this point. Everything seems to be broken haha

The JuMP run doesn’t seem to be using the OP’s initial guess.

No difference.

julia> using JuMP

julia> import Ipopt

julia> function main(data)
           model = Model(Ipopt.Optimizer)
           @variable(model, 0 <= dt <= 1, start = 0.3)
           @variable(model, 1 <= a, start = 2.3333333333333335)
           @variable(model, 0 <= b, start = 0.33333333333333337)
           @NLconstraint(model, a <= 1 / dt)
           variance = Any[]
           push!(variance, 1.0)
           for t in 2:length(data)
               push!(
                   variance, 
                   @NLexpression(model, (1 - dt * a) * variance[t-1] + dt * data[t-1]^2 + dt * b),
               )
           end
           N = length(data)
           @NLobjective(
               model, 
               Min, 
               sum(log(2Ď€ * var) + d^2 / var for (d, var) in zip(data, variance)) / 2N,
           )
           optimize!(model)
           return [value(dt), value(a), value(b)]
       end
main (generic function with 2 methods)

julia> data = [
           2.1217711584057386, -0.28350145551002465, 2.3593492969513004, 0.192856733601849, 0.4566485836385113, 1.332717934013979, -1.286716619379847, 0.9868669960185211, 2.2358674776395224, -2.7933975791568098,
           1.2555871497124622, 1.276879759908467, -0.8392016987911409, -1.1580875182201849, 0.33201646080578456, -0.17212553408696898, 1.1275285626369556, 0.23041139849229036, 1.648423577528424, 2.384823597473343,
           -0.4005518932539747, -1.117737311211693, -0.9490152960583265, -1.1454539355078672, 1.4158585811404159, -0.18926972177257692, -0.2867541528181491, -1.2077459688543788, -0.6397173049620141, 0.66147783407023,
           0.049805188778543466, 0.902540117368457, -0.7018417933284938, 0.47342354473843684, 1.2620345361591596, -1.1483844812087018, -0.06487285080802752, 0.39020117013487715, -0.38454491504165356, 1.5125786171885645,
           -0.6751768274451174, 0.490916740658628, 0.012872300530924086, 0.46532447715746716, 0.34734421531357157, 0.3830452463549559, -0.8730874028738718, 0.4333151627834603, -0.40396180775692375, 2.0794821773418497,
           -0.5392735774960918, 0.6519326323752113, -1.4844713145398716, 0.3688828625691108, 1.010912990717231, 0.5018274939956874, 0.36656889279915833, -0.11403975693239479, -0.6460314660359935, -0.41997005020823147,
           0.9652752515820495, -0.37375868692702047, -0.5780729659197872, 2.642742798278919, 0.5076984117208074, -0.4906395089461916, -1.804352047187329, -0.8596663844837792, -0.7510485548262176, -0.07922589350581195,
           1.7201304839487317, 0.9024493222130577, -1.8216089665357902, 1.3929269238775426, -0.08410752079538407, 0.6423068180438288, 0.6615201016351212, 0.18546977816594887, -0.717521690742993, -1.0224309324751113,
           1.7748350222721971, 0.1929546575877559, -0.1581871639724676, 0.20198379311238596, -0.6919373947349301, -0.9253274269423383, 0.549366272989534, -1.9302106783541606, 0.7197247279281573, -1.220334158468621,
           -0.9187468058921053, -2.1452607604834184, -2.1558650694862687, -0.9387913392336701, -0.676637835687265, -0.16621998352492198, 0.5637177022958897, -0.5258315560278541, 0.8413359958184765, -0.9096866525337141
       ];

julia> main(data)

******************************************************************************
This program contains Ipopt, a library for large-scale nonlinear optimization.
 Ipopt is released as open source code under the Eclipse Public License (EPL).
         For more information visit https://github.com/coin-or/Ipopt
******************************************************************************

This is Ipopt version 3.14.4, running with linear solver MUMPS 5.4.1.

Number of nonzeros in equality constraint Jacobian...:        0
Number of nonzeros in inequality constraint Jacobian.:        2
Number of nonzeros in Lagrangian Hessian.............:        7

Total number of variables............................:        3
                     variables with only lower bounds:        2
                variables with lower and upper bounds:        1
                     variables with only upper bounds:        0
Total number of equality constraints.................:        0
Total number of inequality constraints...............:        1
        inequality constraints with only lower bounds:        0
   inequality constraints with lower and upper bounds:        0
        inequality constraints with only upper bounds:        1

iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
   0  1.8621407e+00 0.00e+00 2.10e+00  -1.0 0.00e+00    -  0.00e+00 0.00e+00   0
   1  1.6724881e+00 0.00e+00 5.18e-01  -1.0 1.83e-01    -  8.26e-01 1.00e+00f  1
   2  1.5562440e+00 0.00e+00 2.36e-01  -1.7 2.65e-01    -  1.00e+00 1.00e+00f  1
   3  1.5191827e+00 0.00e+00 1.61e+00  -1.7 2.12e+00    -  8.10e-01 1.00e+00f  1
   4  1.5107919e+00 0.00e+00 5.73e-01  -1.7 1.24e+00    -  1.00e+00 1.00e+00h  1
   5  1.5046485e+00 0.00e+00 2.36e-01  -1.7 1.89e+00    -  9.60e-01 1.00e+00h  1
   6  1.5073755e+00 0.00e+00 9.81e-02  -1.7 6.68e+01  -4.0 5.60e-02 3.75e-02h  2
   7  1.5089724e+00 0.00e+00 1.25e-01  -1.7 1.08e+01  -3.6 1.00e+00 1.00e+00h  1
   8  1.5062711e+00 0.00e+00 4.23e-02  -1.7 8.94e+00    -  9.14e-01 1.00e+00h  1
   9  1.5072838e+00 3.55e-01 3.44e-02  -1.7 5.79e+01    -  6.48e-01 1.00e+00H  1
Warning: SOC step rejected due to evaluation error
iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
  10  1.5066406e+00 0.00e+00 1.79e-01  -1.7 5.37e+00  -4.1 1.00e+00 5.00e-01h  2
  11  1.5064900e+00 2.69e+01 6.50e-01  -1.7 5.49e+01    -  5.81e-01 1.00e+00H  1
Warning: SOC step rejected due to evaluation error
  12  1.5073601e+00 0.00e+00 6.06e-01  -1.7 8.51e+01  -4.5 5.01e-01 1.00e-01h  2
  13  1.5063547e+00 0.00e+00 3.07e-01  -1.7 9.19e+01  -5.0 1.00e+00 1.00e+00H  1
  14  1.5078132e+00 3.88e+01 6.77e-01  -1.7 1.95e+02    -  1.00e+00 1.00e+00H  1
Warning: SOC step rejected due to evaluation error
  15  1.5087427e+00 0.00e+00 1.23e+00  -1.7 1.19e+02  -4.6 4.07e-01 1.00e-01h  2
  16  1.5083280e+00 0.00e+00 1.61e+01  -1.7 3.73e+01    -  1.00e+00 1.00e+00H  1
  17  1.5070150e+00 0.00e+00 1.09e+01  -1.7 2.01e+01  -5.1 1.00e+00 1.00e+00h  1
  18  1.5070482e+00 0.00e+00 1.36e+01  -1.7 1.02e+02    -  1.00e+00 1.00e+00h  1
  19  1.5069021e+00 0.00e+00 1.39e-02  -1.7 1.96e+01  -5.5 1.00e+00 1.00e+00h  1
iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
  20  1.5068666e+00 0.00e+00 5.25e-02  -3.8 3.40e+00    -  9.94e-01 1.00e+00h  1
  21  1.5067795e+00 0.00e+00 5.62e-02  -3.8 1.18e+02    -  1.00e+00 1.00e+00h  1
  22  1.5066492e+00 0.00e+00 2.69e-02  -3.8 1.96e+02    -  1.00e+00 1.00e+00H  1
  23  1.5067055e+00 0.00e+00 3.06e-02  -3.8 5.48e-01  -6.0 1.00e+00 1.00e+00h  1
  24  1.5066967e+00 0.00e+00 1.23e-03  -3.8 9.78e-01  -6.5 1.00e+00 1.00e+00h  1
  25  1.5066938e+00 0.00e+00 7.59e-05  -3.8 2.32e+00  -7.0 1.00e+00 1.00e+00h  1
  26  1.5066899e+00 0.00e+00 1.59e-04  -3.8 4.04e+00  -7.4 1.00e+00 1.00e+00h  1
  27  1.5066402e+00 0.00e+00 1.18e-02  -5.7 4.19e+01  -7.9 8.72e-01 1.00e+00h  1
  28  1.5065732e+00 0.00e+00 1.38e-01  -5.7 2.81e+02  -8.4 1.00e+00 6.01e-01h  1
  29  1.5063308e+00 0.00e+00 1.06e-01  -5.7 6.45e+01    -  1.00e+00 1.00e+00h  1
iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
  30  1.5061272e+00 0.00e+00 9.78e-02  -5.7 7.45e+01    -  1.00e+00 4.36e-01h  1
  31  1.5062072e+00 0.00e+00 9.82e-02  -5.7 4.71e+01    -  8.79e-01 1.00e+00h  1
  32  1.5050722e+00 0.00e+00 6.56e-02  -5.7 7.84e+00    -  1.00e+00 5.92e-01h  1
  33  1.5040004e+00 0.00e+00 3.52e-02  -5.7 6.79e+00    -  1.00e+00 1.00e+00h  1
  34  1.5031769e+00 0.00e+00 1.56e-02  -5.7 3.65e+00    -  1.00e+00 7.27e-01h  1
  35  1.5027488e+00 0.00e+00 1.75e-03  -5.7 2.62e+00    -  1.00e+00 1.00e+00h  1
  36  1.5034167e+00 0.00e+00 1.57e-02  -5.7 3.59e-01    -  1.09e-01 1.00e+00h  1
  37  1.5027717e+00 0.00e+00 5.48e-03  -5.7 2.36e+00    -  1.00e+00 1.00e+00h  1
  38  1.5027049e+00 0.00e+00 2.07e-03  -5.7 8.38e-01    -  8.72e-01 1.00e+00h  1
  39  1.5027011e+00 0.00e+00 3.40e-05  -5.7 2.20e-01    -  1.00e+00 1.00e+00h  1
iter    objective    inf_pr   inf_du lg(mu)  ||d||  lg(rg) alpha_du alpha_pr  ls
  40  1.5027011e+00 0.00e+00 9.25e-07  -5.7 2.67e-02    -  1.00e+00 1.00e+00h  1
  41  1.5027011e+00 0.00e+00 7.94e-11  -5.7 2.65e-04    -  1.00e+00 1.00e+00h  1
  42  1.5027011e+00 0.00e+00 2.72e-08  -8.6 4.75e-03    -  1.00e+00 1.00e+00h  1
  43  1.5027011e+00 0.00e+00 4.51e-14  -8.6 5.21e-06    -  1.00e+00 1.00e+00h  1

Number of Iterations....: 43

                                   (scaled)                 (unscaled)
Objective...............:   1.5027011040525411e+00    1.5027011040525411e+00
Dual infeasibility......:   4.5096614450569779e-14    4.5096614450569779e-14
Constraint violation....:   0.0000000000000000e+00    0.0000000000000000e+00
Variable bound violation:   0.0000000000000000e+00    0.0000000000000000e+00
Complementarity.........:   2.5059035566699296e-09    2.5059035566699296e-09
Overall NLP error.......:   2.5059035566699296e-09    2.5059035566699296e-09


Number of objective function evaluations             = 63
Number of objective gradient evaluations             = 44
Number of equality constraint evaluations            = 0
Number of inequality constraint evaluations          = 63
Number of equality constraint Jacobian evaluations   = 0
Number of inequality constraint Jacobian evaluations = 44
Number of Lagrangian Hessian evaluations             = 43
Total seconds in IPOPT                               = 2.429

EXIT: Optimal Solution Found.
3-element Vector{Float64}:
 0.099687602434293
 6.94180936589729
 6.817463190450273

There is a difference between JuMP and Optimization.jl in the number of non-zeros in the Jacobian and Hessian though, so that might be the cause.

Even “military grade” solvers like IPOPT come with their set of working assumptions. NLPModelsIpopt.jl and Optimization.jl are just thin interfaces that forward a problem to IPOPT.jl. It’s none of those packages’s fault. It’s just that IPOPT is the wrong solver for your problem.

SQP methods won’t ensure satisfaction of your constraints either, nor will penalty or augmented Lagrangian methods. In the list, I see a solver named CONMAX that claims to be feasible, but I know nothing about it.

All the details of a feasible interior-point solver are given in a paper from my thesis: A primal-dual trust-region algorithm for non-convex nonlinear programming | Mathematical Programming. However, back then, we only implemented it for quadratic problems (because of the huge implementation effort, in Fortran at the time, to write a robust solver for large sparse problems).

Your problem is very small, so it’s probably possible to get a prototype (dense) solver going relatively quickly.