Flux Dense Layer Type Instability

Hi. I am stumped as to why the forward pass is type unstable. It’s effectively a varying length chain similar to Flux.Chain on an alternating sequence of dense and dropout layers:

struct MLP
    dense::Vector{Flux.Dense}
    drop::Vector{Flux.Dropout}
end

function MLP(layer_dims::Vector{Int}, dropout::Bool=true, activation=tanh)
    dense = Flux.Dense[]
    drop = Flux.Dropout[]
    for i=1:length(layer_dims)-1
        if dropout && i > 1
            push!(drop, Flux.Dropout(0.5))
        end

        if i < length(layer_dims)
            push!(dense, Flux.Dense(layer_dims[i]=>layer_dims[i+1], activation))
        else
            push!(dense, Flux.Dense(layer[i] => layer[i+1]))
        end
    end
end

function (mlp::MLP)(x::Matrix{Float32}) # Forward pass
    temp = x
    for i=1:length(mlp.drop)  
        temp = mlp.dense[i](temp)
        temp = mlp.drop[i](temp)
    end
    mlp.dense[i](temp)
    mlp.dense[end](temp)
    temp
end

mlp = SR.MLP([2,2,])
x_fake = rand(Float32, 2, 100)
@code_warntype mlp(x_fake)

The lowered rep shows that temp is unstable:

MethodInstance for ()
Arguments
  mlp::MLP
  x::Matrix{Float32}
Locals
  @_3::Union{Nothing, Tuple{Int64, Int64}}
  temp::Any 
  i::Int64
Body::Any
1 ─       (temp = x)
β”‚   %2  = Base.getproperty(mlp, :drop)::Vector{Flux.Dropout}
β”‚   %3  = length(%2)::Int64
β”‚   %4  = (1:%3)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
β”‚         (@_3 = Base.iterate(%4))
β”‚   %6  = (@_3 === nothing)::Bool
β”‚   %7  = Base.not_int(%6)::Bool
└──       goto #4 if not %7
2 β”„ %9  = @_3::Tuple{Int64, Int64}
β”‚         (i = Core.getfield(%9, 1))
β”‚   %11 = Core.getfield(%9, 2)::Int64
β”‚   %12 = Base.getproperty(mlp, :dense)::Vector{Flux.Dense}
β”‚   %13 = Base.getindex(%12, i)::Flux.Dense
β”‚         (temp = (%13)(temp))
β”‚   %15 = Base.getproperty(mlp, :drop)::Vector{Flux.Dropout}
β”‚   %16 = Base.getindex(%15, i)::Flux.Dropout
β”‚         (temp = (%16)(temp))
β”‚         (@_3 = Base.iterate(%4, %11))
β”‚   %19 = (@_3 === nothing)::Bool
β”‚   %20 = Base.not_int(%19)::Bool
└──       goto #4 if not %20
3 ─       goto #2
4 β”„ %23 = Base.getproperty(mlp, :dense)::Vector{Flux.Dense}
β”‚   %24 = Base.getindex(%23, i)::Any
β”‚         (%24)(temp)
β”‚   %26 = Base.getproperty(mlp, :dense)::Vector{Flux.Dense}
β”‚   %27 = Base.lastindex(%26)::Int64
β”‚   %28 = Base.getindex(%26, %27)::Flux.Dense
β”‚         (%28)(temp)
└──       return temp

I even tried evaluating with one Dense layer and the output it still Any. Is there anything I can do about this or is it not a problem?

Why do you need an alternative to Flux.Chain? I think your problem comes from your custom struct not being concretely typed, as you can check by running isconcretetype(typeof(mlp)). When you take a look at structs like Flux.Dense, they have type parameters to specify what’s inside, which are left aside in your vector storage:

1 Like

So this mlp is a submodel of a larger struct which calls the mlp at the end of its forward pass. I originally tried using Chain, but the problem is that for some reason I received similar warnings from @code_warntype.

I should have put this in the OP, but here is an MWE of the bigger models forward pass:

function (model::BiggerModel)(x, Ξ», Ο•, A, βˆ‡_x, βˆ‡_y)
    x_diffused = model.diffusion_block(x, Ξ», Ο•, A)
    x_intermediate = vcat(x_diffused', x_intermediate)
    x_out = model.mlp(x_intermediate) # Any!
end

This happens if mlp is either the implementation above, or Flux.Chains. Here is also the code I used to construct the Chain:

function MLP(layer_dims::Vector{Int}, dropout::Bool=false, activation=tanh)
    layers = Union{Flux.Dropout, Flux.Dense}[]
    for i=1:length(layer_dims)-1
        if dropout && i > 0
            push!(layers, Flux.Dropout(0.5))
        end
        if i < length(layer_dims)
            push!(layers, Flux.Dense(layer_dims[i] => layer_dims[i+1],activation))
        else
            push!(layers, Flux.Dense(layer[i]=>layer[i+1]))
        end
    end
    Flux.Chain(layers...)
end

And here is an even simpler example what of I’m concerned about:

let
	struct Foo
		d::Flux.Chain
	end
	@Flux.functor Foo
	function (model::Foo)(x)
		temp = d(x)
	end
	d = Flux.Chain(Dense(2=>2), Dropout(0.5))
	g = Foo(d)
	@code_warntype g(rand(Float32, (2,10)))
end

Can you give the struct definition for BiggerModel?

As Guillaume said, declaring types which contain Dense or Chain requires you to provide the type parameters for those structs as well somewhere in the wrapping type (e.g. MLP). Otherwise the types aren’t fully specified and everything is type unstable. See Performance Tips Β· The Julia Language in the performance tips for more info.

1 Like

Ok. So if I would want to make everything concrete, I would need to also need to parameterize MLP with the same parameters as dense? This works:

	struct Foo{F,M<:AbstractMatrix,B}
		d::Flux.Dense{F, M, B}
	end
	f = Foo(Dense(2=>2))
	function (f::Foo)(x)
		temp = f.d(x)
	end
	@code_warntype f(rand(Float32, 2,10))

But would mean that if Foo is part of another larger model (in my case there is another one), then I would also need to parameterize the larger model as well? Is there a more concise way to define Foo?

struct Foo{D<:Dense}
  d::D
end

Or even remove the type constraint entirely:

struct Foo{L}
  layer::L
end

Which is what most of Flux’s container layers do.

1 Like