Cannot achieve type stability

See the MWE below, inferred as Vector on both 1.9 and 1.10-pre.

struct Quadrature{T}
    x::Vector{T}
    w::Vector{T}
end

function cubature(f::F, quadrature::Quadrature, B::NTuple{N}) where {F,N}
    (; x, w) = quadrature
    function _f(_ι)
        ι = Tuple(_ι)
        z = map((i, b) -> getindex(x, i) * b, ι, B)
        ω = prod(i -> getindex(w, i), ι)
        f(z) * ω
    end
    K = length(x)
    sum(_f, CartesianIndices(ntuple(_ -> K, Val(N)))) * prod(B)
end

Q = Quadrature(randn(10), randn(10)) # just a mockup

@code_warntype cubature(sum, Q, (1.0, 2.0))

In addition to a solution, I would also like to improve my understanding on how to diagnose these kind of problems. I tried Chutlhu but it tries to descend into the implementation of sum, which I could not figure out.

Julia has some issues with closures. Try bringing the _f out as another function that takes f as an argument?

Adding a type hint solves the inference issue. The Julia docs has a section on how to improve the performance of closures.

struct Quadrature{T}
    x::Vector{T}
    w::Vector{T}
end

function cubature(f::F, quadrature::Quadrature{T}, B::NTuple{N,T}) where {F,T,N}
    (; x, w) = quadrature
    function _f(_ι)::T
        ι = Tuple(_ι)
        z = map((i, b) -> getindex(x, i) * b, ι, B)
        ω = prod(i -> getindex(w, i), ι)
        f(z) * ω
    end
    K = length(x)
    sum(_f, CartesianIndices(ntuple(_ -> K, Val(N)))) * prod(B)
end

Q = Quadrature(randn(10), randn(10)) # just a mockup

@code_warntype cubature(sum, Q, (1.0, 2.0))

The closure here has concrete contents.

1 Like

I am not sure this is the closure bug that can be solved by a let.

Again, I appreciate all help, but I am mostly after digging into specifics on why this happens, not generic advice.

This will help a little bit, but it won’t actually solve the type stability problem, just mask it.

2 Likes

So here’s part of my strategy for stuff like this: modify the function to return the closure, and then start inspecting it:

julia> function cubature(f::F, quadrature::Quadrature, B::NTuple{N}) where {F,N}
           (; x, w) = quadrature
           function _f(_ι)
               ι = Tuple(_ι)
               z = map((i, b) -> getindex(x, i) * b, ι, B)
               ω = prod(i -> getindex(w, i), ι)
               f(z) * ω
           end
           K = length(x)
           sum(_f, CartesianIndices(ntuple(_ -> K, Val(N)))) * prod(B)
           return _f
       end
cubature (generic function with 1 method)

julia> f = cubature(sum, Q, (1.0, 2.0))
(::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}) (generic function with 1 method)

julia> fieldtypes(typeof(f))
(typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64})

Okay, there’s no Boxes in there, so far so good. Let’s see what sum(f, ::CartesianIndices) does:

julia> @code_warntype sum(f, CartesianIndices(ntuple(_ -> 3, 5)))
MethodInstance for sum(::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}, ::CartesianIndices{5, NTuple{5, Base.OneTo{Int64}}})
  from sum(f, a::AbstractArray; dims, kw...) @ Base reducedim.jl:997
Arguments
  #self#::Core.Const(sum)
  f::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
  a::CartesianIndices{5, NTuple{5, Base.OneTo{Int64}}}
Body::Any
1 ─      nothing
│   %2 = Base.:(:)::Core.Const(Colon())
│   %3 = Core.NamedTuple()::Core.Const(NamedTuple())
│   %4 = Base.pairs(%3)::Core.Const(Base.Pairs{Symbol, Union{}, Tuple{}, @NamedTuple{}}())
│   %5 = Base.:(var"#sum#829")(%2, %4, #self#, f, a)::Any
└──      return %5

Hm. That’s wierd. Lets see what f does to the first element of a CartesianIndex:

julia> @code_warntype f(CartesianIndices(ntuple(_ -> 3, 5)) |> first)
MethodInstance for (::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}})(::CartesianIndex{5})
  from (::var"#_f#35")(_ι) @ Main REPL[24]:3
Arguments
  #self#::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
  _ι::CartesianIndex{5}
Locals
  #33::var"#33#37"{Vector{Float64}}
  #32::var"#32#36"{Vector{Float64}}
  ω::Float64
  z::Tuple{Float64, Float64}
  ι::NTuple{5, Int64}
Body::Float64
1 ─       (ι = Main.Tuple(_ι))
│   %2  = Main.:(var"#32#36")::Core.Const(var"#32#36")
│   %3  = Core.getfield(#self#, :x)::Vector{Float64}
│   %4  = Core.typeof(%3)::Core.Const(Vector{Float64})
│   %5  = Core.apply_type(%2, %4)::Core.Const(var"#32#36"{Vector{Float64}})
│   %6  = Core.getfield(#self#, :x)::Vector{Float64}
│         (#32 = %new(%5, %6))
│   %8  = #32::var"#32#36"{Vector{Float64}}
│   %9  = ι::NTuple{5, Int64}
│   %10 = Core.getfield(#self#, :B)::Tuple{Float64, Float64}
│         (z = Main.map(%8, %9, %10))
│   %12 = Main.:(var"#33#37")::Core.Const(var"#33#37")
│   %13 = Core.getfield(#self#, :w)::Vector{Float64}
│   %14 = Core.typeof(%13)::Core.Const(Vector{Float64})
│   %15 = Core.apply_type(%12, %14)::Core.Const(var"#33#37"{Vector{Float64}})
│   %16 = Core.getfield(#self#, :w)::Vector{Float64}
│         (#33 = %new(%15, %16))
│   %18 = #33::var"#33#37"{Vector{Float64}}
│         (ω = Main.prod(%18, ι))
│   %20 = Core.getfield(#self#, :f)::Core.Const(sum)
│   %21 = (%20)(z)::Float64
│   %22 = (%21 * ω)::Float64
└──       return %22

Okay, that’s even weirder, because this all looks good. This makes me suspect the compiler is just giving up somewhere in the implementation of sum due to some heuristic. I wonder if we can bypass it by replacing sum with something simpler:

julia> @code_warntype mapfoldl(f, +, CartesianIndices(ntuple(_ -> 3, 5)))
MethodInstance for mapfoldl(::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}, ::typeof(+), ::CartesianIndices{5, NTuple{5, Base.OneTo{Int64}}})
  from mapfoldl(f, op, itr; init) @ Base reduce.jl:175
Arguments
  #self#::Core.Const(mapfoldl)
  f::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
  op::Core.Const(+)
  itr::CartesianIndices{5, NTuple{5, Base.OneTo{Int64}}}
Body::Float64
1 ─ %1 = Base._InitialValue()::Core.Const(Base._InitialValue())
│   %2 = Base.:(var"#mapfoldl#298")(%1, #self#, f, op, itr)::Float64
└──      return %2

aha, it’s all inferred! So lets see what happens if we replace sum(f, ci) with mapfoldl(f, +, ci) in the original function:

julia> function cubature(f::F, quadrature::Quadrature, B::NTuple{N}) where {F,N}
           (; x, w) = quadrature
           function _f(_ι)
               ι = Tuple(_ι)
               z = map((i, b) -> getindex(x, i) * b, ι, B)
               ω = prod(i -> getindex(w, i), ι)
               f(z) * ω
           end
           K = length(x)
           mapfoldl(+, _f, CartesianIndices(ntuple(_ -> K, Val(N)))) * prod(B)
       end;

julia> @code_warntype cubature(sum, Q, (1.0, 2.0))
MethodInstance for cubature(::typeof(sum), ::Quadrature{Float64}, ::Tuple{Float64, Float64})
  from cubature(f::F, quadrature::Quadrature, B::Tuple{Vararg{T, N}} where T) where {F, N} @ Main REPL[30]:1
Static Parameters
  F = typeof(sum)
  N = 2
Arguments
  #self#::Core.Const(cubature)
  f::Core.Const(sum)
  quadrature::Quadrature{Float64}
  B::Tuple{Float64, Float64}
Locals
  #47::var"#47#51"{Int64}
  K::Int64
  _f::var"#_f#48"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
  w::Vector{Float64}
  x::Vector{Float64}
Body::Union{}
1 ─       (x = Base.getproperty(quadrature, :x))
│         (w = Base.getproperty(quadrature, :w))
│   %3  = Main.:(var"#_f#48")::Core.Const(var"#_f#48")
│   %4  = Core.typeof(f)::Core.Const(typeof(sum))
│   %5  = Core.typeof(B)::Core.Const(Tuple{Float64, Float64})
│   %6  = Core.typeof(w)::Core.Const(Vector{Float64})
│   %7  = Core.typeof(x)::Core.Const(Vector{Float64})
│   %8  = Core.apply_type(%3, %4, %5, %6, %7)::Core.Const(var"#_f#48"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}})
│   %9  = w::Vector{Float64}
│         (_f = %new(%8, f, B, %9, x))
│         (K = Main.length(x))
│   %12 = Main.:+::Core.Const(+)
│   %13 = _f::var"#_f#48"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
│   %14 = Main.:(var"#47#51")::Core.Const(var"#47#51")
│   %15 = Core.typeof(K)::Core.Const(Int64)
│   %16 = Core.apply_type(%14, %15)::Core.Const(var"#47#51"{Int64})
│         (#47 = %new(%16, K))
│   %18 = #47::var"#47#51"{Int64}
│   %19 = Main.Val($(Expr(:static_parameter, 2)))::Core.Const(Val{2}())
│   %20 = Main.ntuple(%18, %19)::Tuple{Int64, Int64}
│   %21 = Main.CartesianIndices(%20)::CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}
│         Main.mapfoldl(%12, %13, %21)
│         Core.Const(:(Main.prod(B)))
│         Core.Const(:(%22 * %23))
└──       Core.Const(:(return %24))

Yep, that was it.

I suspect then what happened is that sum is specialized on CartesianIndices to try and statically unroll, but it ran up against some unrolling heuristic and gave up optimizing it, so the “dumber” mapfoldl turned out better.

If numerical stability is a concern, you might find it better to use mapreduce instead of mapfoldl.

11 Likes

Oops, no I didn’t solve it actually. When I wrote the above I didn’t notice that the body inferred to Union{}! It actually errors because I wrote the arguments to mapfoldl in the wrong order :man_facepalming:

julia> function cubature(f::F, quadrature::Quadrature, B::NTuple{N}) where {F,N}
           (; x, w) = quadrature
           function _f(_ι)
               ι = Tuple(_ι)
               z = map((i, b) -> getindex(x, i) * b, ι, B)
               ω = prod(i -> getindex(w, i), ι)
               f(z) * ω
           end
           K = length(x)
           mapfoldl(_f, +, CartesianIndices(ntuple(_ -> K, Val(N)))) * prod(B)
       end;
julia> @code_warntype cubature(sum, Q, (1.0, 2.0))
MethodInstance for cubature(::typeof(sum), ::Quadrature{Float64}, ::Tuple{Float64, Float64})
  from cubature(f::F, quadrature::Quadrature, B::Tuple{Vararg{T, N}} where T) where {F, N} @ Main REPL[4]:1
Static Parameters
  F = typeof(sum)
  N = 2
Arguments
  #self#::Core.Const(cubature)
  f::Core.Const(sum)
  quadrature::Quadrature{Float64}
  B::Tuple{Float64, Float64}
Locals
  #15::var"#15#19"{Int64}
  K::Int64
  _f::var"#_f#16"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
  w::Vector{Float64}
  x::Vector{Float64}
Body::Any
1 ─       (x = Base.getproperty(quadrature, :x))
│         (w = Base.getproperty(quadrature, :w))
│   %3  = Main.:(var"#_f#16")::Core.Const(var"#_f#16")
│   %4  = Core.typeof(f)::Core.Const(typeof(sum))
│   %5  = Core.typeof(B)::Core.Const(Tuple{Float64, Float64})
│   %6  = Core.typeof(w)::Core.Const(Vector{Float64})
│   %7  = Core.typeof(x)::Core.Const(Vector{Float64})
│   %8  = Core.apply_type(%3, %4, %5, %6, %7)::Core.Const(var"#_f#16"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}})
│   %9  = w::Vector{Float64}
│         (_f = %new(%8, f, B, %9, x))
│         (K = Main.length(x))
│   %12 = _f::var"#_f#16"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
│   %13 = Main.:+::Core.Const(+)
│   %14 = Main.:(var"#15#19")::Core.Const(var"#15#19")
│   %15 = Core.typeof(K)::Core.Const(Int64)
│   %16 = Core.apply_type(%14, %15)::Core.Const(var"#15#19"{Int64})
│         (#15 = %new(%16, K))
│   %18 = #15::var"#15#19"{Int64}
│   %19 = Main.Val($(Expr(:static_parameter, 2)))::Core.Const(Val{2}())
│   %20 = Main.ntuple(%18, %19)::Tuple{Int64, Int64}
│   %21 = Main.CartesianIndices(%20)::CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}
│   %22 = Main.mapfoldl(%12, %13, %21)::Any
│   %23 = Main.prod(B)::Float64
│   %24 = (%22 * %23)::Any
└──       return %24

:face_with_symbols_over_mouth:

So perhaps there’s still some more heuristics here that are giving up, or I’m missing another obvious mistake…

But anyways, that’s my general process with things like this. Unfortunately, it can sometimes be a bit of a game of whack-a-mole.

3 Likes

Thanks for the help, but this still does not infer for me. Assuming you changed the last sum, the self-contained code is

struct Quadrature{T}
    x::Vector{T}
    w::Vector{T}
end

function cubature(f::F, quadrature::Quadrature, B::NTuple{N}) where {F,N}
    (; x, w) = quadrature
    function _f(_ι)
        ι = Tuple(_ι)
        z = map((i, b) -> getindex(x, i) * b, ι, B)
        ω = prod(i -> getindex(w, i), ι)
        f(z) * ω
    end
    K = length(x)
    mapfoldl(_f, +, CartesianIndices(ntuple(_ -> K, Val(N)))) * prod(B)
end

Q = Quadrature(randn(10), randn(10)) # just a mockup

@code_warntype cubature(sum, Q, (1.0, 2.0))

which still does not infer — now it is Any.

Try replacing this with an explicit loop? Use the zero function for initialization.

Yeah, an explicit loop does work for me:

julia> function cubature(f::F, quadrature::Quadrature{T}, B::NTuple{N}) where {F,N, T}
           (; x, w) = quadrature
           function _f(_ι)
               ι = Tuple(_ι)
               z = map((i, b) -> getindex(x, i) * b, ι, B)
               ω = prod(i -> getindex(w, i), ι)
               f(z) * ω
           end
           K = length(x)
           s = zero(T)
           for x ∈ CartesianIndices(ntuple(_ -> K, Val(N)))
               s += _f(x)
           end
           s * prod(B)
       end;

julia> @code_warntype cubature(sum, Q, (1.0, 2.0))
MethodInstance for cubature(::typeof(sum), ::Quadrature{Float64}, ::Tuple{Float64, Float64})
  from cubature(f::F, quadrature::Quadrature{T}, B::Tuple{Vararg{T, N}} where T) where {F, N, T} @ Main REPL[15]:1
Static Parameters
  F = typeof(sum)
  N = 2
  T = Float64
Arguments
  #self#::Core.Const(cubature)
  f::Core.Const(sum)
  quadrature::Quadrature{Float64}
  B::Tuple{Float64, Float64}
Locals
  @_5::Union{Nothing, Tuple{CartesianIndex{2}, CartesianIndex{2}}}
  #60::var"#60#64"{Int64}
  s::Float64
  K::Int64
  _f::var"#_f#61"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
  w::Vector{Float64}
  x@_11::Vector{Float64}
  x@_12::CartesianIndex{2}
Body::Float64
1 ─       (x@_11 = Base.getproperty(quadrature, :x))
│         (w = Base.getproperty(quadrature, :w))
│   %3  = Main.:(var"#_f#61")::Core.Const(var"#_f#61")
│   %4  = Core.typeof(f)::Core.Const(typeof(sum))
│   %5  = Core.typeof(B)::Core.Const(Tuple{Float64, Float64})
│   %6  = Core.typeof(w)::Core.Const(Vector{Float64})
│   %7  = Core.typeof(x@_11)::Core.Const(Vector{Float64})
│   %8  = Core.apply_type(%3, %4, %5, %6, %7)::Core.Const(var"#_f#61"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}})
│   %9  = w::Vector{Float64}
│         (_f = %new(%8, f, B, %9, x@_11))
│         (K = Main.length(x@_11))
│         (s = Main.zero($(Expr(:static_parameter, 3))))
│   %13 = Main.:(var"#60#64")::Core.Const(var"#60#64")
│   %14 = Core.typeof(K)::Core.Const(Int64)
│   %15 = Core.apply_type(%13, %14)::Core.Const(var"#60#64"{Int64})
│         (#60 = %new(%15, K))
│   %17 = #60::var"#60#64"{Int64}
│   %18 = Main.Val($(Expr(:static_parameter, 2)))::Core.Const(Val{2}())
│   %19 = Main.ntuple(%17, %18)::Tuple{Int64, Int64}
│   %20 = Main.CartesianIndices(%19)::CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}
│         (@_5 = Base.iterate(%20))
│   %22 = (@_5 === nothing)::Bool
│   %23 = Base.not_int(%22)::Bool
└──       goto #4 if not %23
2 ┄ %25 = @_5::Tuple{CartesianIndex{2}, CartesianIndex{2}}
│         (x@_12 = Core.getfield(%25, 1))
│   %27 = Core.getfield(%25, 2)::CartesianIndex{2}
│   %28 = s::Float64
│   %29 = (_f)(x@_12)::Float64
│         (s = %28 + %29)
│         (@_5 = Base.iterate(%20, %27))
│   %32 = (@_5 === nothing)::Bool
│   %33 = Base.not_int(%32)::Bool
└──       goto #4 if not %33
3 ─       goto #2
4 ┄ %36 = s::Float64
│   %37 = Main.prod(B)::Float64
│   %38 = (%36 * %37)::Float64
└──       return %38

JET reports some runtime dispatch in the mapreduce reduction ultimately used here, as well as a failure to optimize the call due to recursion:

┌ mapfoldl_impl(f::var"#4#8"{Vector{Float64}}, op::typeof(Base.mul_prod), nt::Base._InitialValue, itr::Tuple{Int64, Int64}) @ Base ./reduce.jl:42
│ failed to optimize due to recursion: Base.mapfoldl_impl(::var"#4#8"{Vector{Float64}}, ::typeof(Base.mul_prod), ::Base._InitialValue, ::Tuple{Int64, Int64})
└────────────────────

which tracks that this works with an explicit loop.

The output is quite long, but this is indeed from sum, not prod, even though the specific remark I quoted here says it’s mul_prod.

2 Likes

Don’t, since it is not equivalent (eg mapreduce etc does not require that it is defined). It is better to use Iterators.peel or something similar:

function cubature3(f::F, quadrature::Quadrature, B::NTuple{N}) where {F,N}
    (; x, w) = quadrature
    function _f(_ι)
        ι = Tuple(_ι)
        z = map((i, b) -> getindex(x, i) * b, ι, B)
        ω = prod(i -> getindex(w, i), ι)
        f(z) * ω
    end
    K = length(x)
    # here we assume it is not empty
    ι1, ι_rest = Iterators.peel(CartesianIndices(ntuple(_ -> K, Val(N))))
    s = _f(ι1)
    for ι in ι_rest
        s += _f(ι)
    end
    s * prod(B)
end

But (for the third time) that is not the question. There are various workarounds and they are well known, what I want is understand the problem. I appreciate that you are trying to help, but please kindly read the original question.

Yes! That’s the tool I was looking for. Digging into it, it is actually prod. This works:

prod_outside(w, ι) = prod(i -> getindex(w, i), ι)

function cubature(f::F, quadrature::Quadrature, B::NTuple{N}) where {F,N}
    (; x, w) = quadrature
    function _f(_ι)
        ι = Tuple(_ι)
        z = map((i, b) -> getindex(x, i) * b, ι, B)
        ω = prod_outside(w, ι)
        f(z)
    end
    K = length(x)
    sum(_f, CartesianIndices(ntuple(_ -> K, Val(N))))
end

Now, I am wondering if I should open an issue.

4 Likes

I think this is an issue worth investigating, Maybe the Julia devs will know better and may find a way to fix it or something. Open the issue.

Might be worth opening, but I suspect that it might just be shrugged at. Ultimately, julia’s optimization pipeline is quite heavy on heuristics, and there are no optimizations that are guaranteed to occur. There was some critical complexity hit here by the compiler, and it gave up in an unfortunate way.

The big thing I guess is just that capturing variables through multiple layers of nested closures stresses out the compiler, so anything you can do to flatten the structure is often beneficial from the compiler’s POV.

This sort of optimization thing is tricky, because we often train ourselves to rely on it, and then one day you can just cross some threshold and suddenly it fails for an opaque reason.

3 Likes

This particular case might be resolved by something like RFC: Less aggressive recursion limiting by Keno · Pull Request #48059 · JuliaLang/julia · GitHub, but there’s no indication for that PR (or the concept discussed there) being revived.

For what it’s worth, this version doesn’t actually eliminate the type instability for me on my machine, though it does work for me if I do an explicit @noinline.

@noinline prod_outside(w, ι) = prod(i -> getindex(w, i), ι)
1 Like

Opened an issue:

Even if it is a manifestation of some other issue, it would be good to have that identified.

4 Likes