Hi,
I’m trying to solve a convex optimisation. More specifically, I build a neural network NN(x, y)
, which is convex in argument y
. Such neural network can be constructed as suggested in Input Convex Neural Networks.
So I wanna find the optimal variable y^*
which minimises NN(x, y)
for given x
.
However, (as I was worried,) simple construction of the NN using Flux cannot be solved using Convex.jl. That may be due to that Convex.jl
works only for the predefined operations to make sure that it is convex problem.
I wonder if it’s impossible to do this.
+) edit: I will mostly use ReLU
activation.
++) edit: Here’s my implementation codes (actually I don’t understand the role of @functor
):
using Flux
using Flux: @functor
using Flux: glorot_uniform
using Convex
"""
Wz should be nonnegative
"""
struct ICNN_Layer
# x-path
W̃
b̃
g̃
# y-path
Wz
Wzu
bz
Wy
Wyu
by
Wu
b
g
end
function ICNN_Layer(uin::Integer, uout::Integer,
zin::Integer, zout::Integer,
y::Integer, # y's dim
g̃=identity, g=identity;
initW = glorot_uniform, initb = zeros)
layer = ICNN_Layer(
initW(uout, uin), initb(uout), g̃, # W̃, b̃, g̃ (x-path)
max.(initW(zout, zin), 0.0), initW(zin, uin), initb(zin), # "Wz", Wzu, bz
initW(zout, y), initW(y, uin), initb(y), # Wy, Wyu, by
initW(zout, uin), initb(zout), g # Wu, b, g
)
return layer
end
@functor ICNN_Layer
function (a::ICNN_Layer)(input)
u, z, y =input
# params
W̃, b̃, g̃ = a.W̃, a.b̃, a.g̃
Wz, Wzu, bz = a.Wz, a.Wzu, a.bz
Wy, Wyu, by = a.Wy, a.Wyu, a.by
Wu, b, g = a.Wu, a.b, a.g
# network operation
u_next = g̃.(W̃*u .+ b̃)
z_next = g.(
Wz * (z .* max.(Wzu*u .+ bz, 0.0))
+ Wy * (y .* (Wyu*u .+ by))
+ (Wu * u .+ b)
)
return u_next, z_next, y
end
function _selector(input)
u, z, y = input
return z
end
function Flux.relu(x::Convex.AdditionAtom)
return Convex.pos(x)
end
## ICNN
act = relu
n_x = 5
n_h = 128
n_y = 3
n_V = 1
layers = [
ICNN_Layer(n_x, n_h, n_h, n_h, n_y, act, act),
ICNN_Layer(n_h, n_h, n_h, n_V, n_y, act, act),
_selector,
]
# Wz_0 is meaningless; z_0 = zeros(1) is convention
m(x, y) = foldl((input, m) -> m(input), layers, init = (x, zeros(n_h), y))
x = collect(1:n_x)
y = collect(n_x+1:n_x+n_y)
ẑ = m(x, y)
println(ẑ)
## Convex.jl + ICNN
y_opt = Variable(n_y)
minimize(m(x, y_opt))
And errors:
ERROR: LoadError: MethodError: no method matching zero(::Convex.AdditionAtom)
Closest candidates are:
zero(::Type{Pkg.Resolve.VersionWeight}) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Pkg/src/Resolve/versionweights.jl:15
zero(::Type{Pkg.Resolve.FieldValue}) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/Pkg/src/Resolve/fieldvalues.jl:38
zero(::Type{ModelingToolkit.TermCombination}) at /home/jinrae/.julia/packages/ModelingToolkit/1qEYb/src/linearity.jl:67
...
Stacktrace:
[1] generic_matvecmul!(::Array{Convex.AdditionAtom,1}, ::Char, ::Array{Float32,2}, ::Array{Convex.MultiplyAtom,1}, ::LinearAlgebra.MulAddMul{true,true,Bool,Bool}) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/LinearAlgebra/src/matmul.jl:685
[2] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/LinearAlgebra/src/matmul.jl:81 [inlined]
[3] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/LinearAlgebra/src/matmul.jl:208 [inlined]
[4] *(::Array{Float32,2}, ::Array{Convex.MultiplyAtom,1}) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.5/LinearAlgebra/src/matmul.jl:51
[5] (::ICNN_Layer)(::Tuple{Array{Int64,1},Array{Float64,1},Variable}) at /home/jinrae/.julia/dev/GliderPathPlanning/test/icnn.jl:53
[6] (::var"#35#36")(::Tuple{Array{Int64,1},Array{Float64,1},Variable}, ::ICNN_Layer) at /home/jinrae/.julia/dev/GliderPathPlanning/test/icnn.jl:83
[7] BottomRF at ./reduce.jl:81 [inlined]
[8] _foldl_impl(::Base.BottomRF{var"#35#36"}, ::Tuple{Array{Int64,1},Array{Float64,1},Variable}, ::Array{Any,1}) at ./reduce.jl:58
[9] foldl_impl at ./reduce.jl:48 [inlined]
[10] mapfoldl_impl(::typeof(identity), ::var"#35#36", ::NamedTuple{(:init,),Tuple{Tuple{Array{Int64,1},Array{Float64,1},Variable}}}, ::Array{Any,1}) at ./reduce.jl:44
[11] mapfoldl(::Function, ::Function, ::Array{Any,1}; kw::Base.Iterators.Pairs{Symbol,Tuple{Array{Int64,1},Array{Float64,1},Variable},Tuple{Symbol},NamedTuple{(:init,),Tuple{Tuple{Array{Int64,1},Array{Float64,1},Variable}}}}) at ./reduce.jl:160
[12] #foldl#205 at ./reduce.jl:178 [inlined]
[13] m(::Array{Int64,1}, ::Variable) at /home/jinrae/.julia/dev/GliderPathPlanning/test/icnn.jl:83
[14] top-level scope at /home/jinrae/.julia/dev/GliderPathPlanning/test/icnn.jl:93
[15] include(::String) at ./client.jl:457
[16] top-level scope at REPL[1]:1
i