Not if you want to train model1 and model2 as well. Again, the paramaters of them will not be seen inside the function:
julia> model1 = Chain(Dense(4 => 5),vec)
Chain(
Dense(4 => 5), # 25 parameters
vec,
)
julia> model2 = Chain(Dense(4 => 6),vec)
Chain(
Dense(4 => 6), # 30 parameters
vec,
)
julia> model3 = Chain(
x->cat(model1(x),model2(x),dims=1),
Dense(11 => 11),
vec
)
Chain(
var"#7#8"(),
Dense(11 => 11), # 132 parameters
vec,
)
# model3 only has the parameters of the last Dense layer
julia> Dense(11 => 11)
Dense(11 => 11) # 132 parameters
# Use Parallel again to combine the sub-models -- now the parameters are all visible
julia> model3 = Chain(
Parallel((x,y)->cat(x,y,dims=1), model1, model2),
Dense(11 => 11),
vec
)
Chain(
Parallel(
var"#11#12"(),
Chain(
Dense(4 => 5), # 25 parameters
vec,
),
Chain(
Dense(4 => 6), # 30 parameters
vec,
),
),
Dense(11 => 11), # 132 parameters
vec,
) # Total: 6 arrays, 187 parameters, 1.035 KiB.
PS: Also not sure about the vec at the end of your model. In Flux models usually work on batches of input, i.e.,
julia> m = Dense(4 => 5)
Dense(4 => 5) # 25 parameters
julia> size(m(rand(4))) # single input vector
(5,)
julia> size(m(rand(4, 8))) # batch of 8 inputs
(5, 8)
# model with vec eliminates batch dimension
julia> size(model1(rand(4, 8)))
(40,)