I made a few changes, when the first layer uses relu, the Hessian at origin is SPD.
function create_icnn(n_vars::Int, hidden_dims::Vector{Int}=[32, 32];
T::Type=Float64, rng::AbstractRNG=Random.default_rng())
@assert !isempty(hidden_dims) "hidden_dims cannot be empty"
layers = []
# First layer
push!(layers, ICNNFirstLayer(n_vars, hidden_dims[1], relu; T=T))
# Hidden layers
for i in 1:(length(hidden_dims)-1)
push!(layers, ICNNLayer(n_vars, hidden_dims[i],
hidden_dims[i+1], softplus; T=T))
end
# Output layer (no activation, with quadratic terms)
push!(layers, FinalICNNLayer(n_vars, hidden_dims[end], 1, identity;
use_bias=false, use_quadratic=true, T=T))
model = ICNNChain(layers...)
ps, st = Lux.setup(rng, model)
return model, ps, st
end
julia> include("examples/mwe.jl")
6.539322516230143
true
true
It somehow is a bug?