Flux; `params` cannot track any parameters of networks when building networks without `Chain`

For my personal uses, I build a network like the example provided by Flux.

However, the resulting function (model) m is not compatible with Flux.params.

Here’s reproducible easy example corresponding to my case:

julia> using Flux

julia> layers = [Dense(5, 2), Dense(2, 1)]
2-element Array{Dense{typeof(identity),Array{Float32,2},Array{Float32,1}},1}:
 Dense(5, 2)
 Dense(2, 1)

julia> m(x) = foldl((x, m) -> m(x), layers, init=x)
m (generic function with 1 method)

julia> params(m)
Params([])

Is there any way to use params with the networks constructed by this way of using foldl?

Note:
Here’s my raw code (illustrative; would not be executable for others due to dependencies).

function ICNN(n_x, n_y, n_V, n_h, act, num_layers)
    layers = []
    for i in 1:num_layers
        n_u, n_out = n_h, n_h
        if i == 1
            n_u = n_x
        elseif i == num_layers
            n_out = n_V
        end
        layer = ICNN_Layer(n_u, n_h, n_h, n_out, n_y, act)
        push!(layers, layer)
    end
    push!(layers, selector)
    m(x, y) = foldl((input, m) -> m(input, layers, init=(x, zeros(n_h), y)))
    return m
end

I am not familiar with foldl function, so could you give an example of how you would apply the resulting m(x)?
Currently it gives me an error if I try to use it with a number. It also explains why params returns nothing.

julia> m(1.0)
ERROR: MethodError: no method matching (::Dense{typeof(identity),Array{Float32,2},Array{Float32,1}})(::Float64)
Closest candidates are:
  Any(::AbstractArray{T,N} where N) where {T<:Union{Float32, Float64}, W<:(AbstractArray{T,N} where N)} at C:\Users\a_ill\.julia\packages\Flux\q3zeA\src\layers\basic.jl:134
  Any(::AbstractArray{var"#s128",N} where N where var"#s128"<:AbstractFloat) where {T<:Union{Float32, Float64}, W<:(AbstractArray{T,N} where N)} at C:\Users\a_ill\.julia\packages\Flux\q3zeA\src\layers\basic.jl:137
  Any(::AbstractArray) at C:\Users\a_ill\.julia\packages\Flux\q3zeA\src\layers\basic.jl:121
Stacktrace:
 [1] (::var"#244#245")(::Float64, ::Dense{typeof(identity),Array{Float32,2},Array{Float32,1}}) at .\none:1
 [2] BottomRF at .\reduce.jl:81 [inlined]
 [3] _foldl_impl at .\reduce.jl:58 [inlined]
 [4] foldl_impl at .\reduce.jl:48 [inlined]
 [5] mapfoldl_impl(::typeof(identity), ::var"#244#245", ::NamedTuple{(:init,),Tuple{Float64}}, ::Array{Dense{typeof(identity),Array{Float32,2},Array{Float32,1}},1}) at .\reduce.jl:44
 [6] mapfoldl(::Function, ::Function, ::Array{Dense{typeof(identity),Array{Float32,2},Array{Float32,1}},1}; kw::Base.Iterators.Pairs{Symbol,Float64,Tuple{Symbol},NamedTuple{(:init,),Tuple{Float64}}}) at .\reduce.jl:160
 [7] #foldl#205 at .\reduce.jl:178 [inlined]
 [8] m(::Float64) at .\none:1
 [9] top-level scope at none:1

Edit: Nevermind, my mistake. rand(5) works as an input.

If you define your function through a struct then it seems to work.

struct m_upper
    layers::Tuple
end
function m_upper(x::Array,layers::Tuple)
    result = foldl((x, m) -> m(x), layers, init=x)
    return result
end
(m::m_upper)(x) = m_upper(x, m.layers)
Flux.@functor m_upper

m = m_upper((Dense(5, 2), Dense(2, 1)))

julia> params(m)
Params([Float32[0.08757681 0.66873735 … -0.68970186 -0.808191; 0.7758514 -0.83058345 … 0.37495735 -0.58736336], Float32[0.0, 0.0], Float32[0.96472514 -0.34120739], Float32[0.0]])

Ah, it’s a nice trick :slight_smile:

Note that this is almost exactly how Flux.Chain is defined. @iHany is there a reason that doesn’t work for you? I ask because Chain has a lot of additional functionality that isn’t available on a custom layer wrapper (think torch.nn.Sequential).

I realised that using Flux.Chain will be better if possible for e.g., gpu support.
However, Flux.Chain seems not support multiple arguments.

Is there any convenient way to make it compatible with multiple arguments?

EDIT: a way I found is that a model m receives an argument of tuple like m((x, y)).
It would be much better if it is possible to receive two arguments like m(x, y).

I did some tricks as follows.
Note that the code may not work (I just copy and paste some parts of my code for description).

## ICNN
struct ICNN
    chain::Flux.Chain
end

function ICNN_Layer(uin::Integer, uout::Integer,
        zin::Integer, zout::Integer,
        y::Integer,  # y's dim
        # g=identity,  # Fixed to be ReLU for now.
        g̃=identity;
        initW = Flux.glorot_uniform, initb = zeros)
    layer = ICNN_Layer(
        uin, y,  # in & out
        initW(uout, uin), initb(uout), g̃,  # W̃, b̃, g̃ (x-path)
        max.(initW(zout, zin), 0.0), initW(zin, uin), initb(zin),  # "Wz", Wzu, bz
        initW(zout, y), initW(y, uin), initb(y),  # Wy, Wyu, by
        initW(zout, uin), initb(zout),  # Wu, b
        # g,  # g (y-path activation)
    )
    return layer
end

Flux.@functor ICNN_Layer  # make "struct" compatible with Flux

function ICNN(x, y, chain)
    result = chain((x, y))
    return result
end

function init_layer(input)
    x, y = input
    return x, 0.0, y  # not `x, zeros(1), y` for CUDA
end

function selector(input)
    u, z, y = input
    return z
end


function ICNN(n_x::Integer, n_y::Integer, n_h::Integer, act, num_layers::Integer)
    n_V = 1  # the resulting network should be scalar-valued function
    layers = []
    push!(layers, init_layer)
    for i in 1:num_layers
        n_uin, n_uout, n_zin, n_zout = n_h, n_h, n_h, n_h
        if i == 1
            n_uin, n_zin = n_x, 1
        elseif i == num_layers
            n_zout = n_V
        end
        layer = ICNN_Layer(n_uin, n_uout, n_zin, n_zout, n_y, act)
        push!(layers, layer)
    end
    push!(layers, selector)
    m = ICNN(Chain(layers...))
    return m
end