So here’s part of my strategy for stuff like this: modify the function to return the closure, and then start inspecting it:
julia> function cubature(f::F, quadrature::Quadrature, B::NTuple{N}) where {F,N}
(; x, w) = quadrature
function _f(_ι)
ι = Tuple(_ι)
z = map((i, b) -> getindex(x, i) * b, ι, B)
ω = prod(i -> getindex(w, i), ι)
f(z) * ω
end
K = length(x)
sum(_f, CartesianIndices(ntuple(_ -> K, Val(N)))) * prod(B)
return _f
end
cubature (generic function with 1 method)
julia> f = cubature(sum, Q, (1.0, 2.0))
(::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}) (generic function with 1 method)
julia> fieldtypes(typeof(f))
(typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64})
Okay, there’s no Boxes in there, so far so good. Let’s see what sum(f, ::CartesianIndices) does:
julia> @code_warntype sum(f, CartesianIndices(ntuple(_ -> 3, 5)))
MethodInstance for sum(::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}, ::CartesianIndices{5, NTuple{5, Base.OneTo{Int64}}})
from sum(f, a::AbstractArray; dims, kw...) @ Base reducedim.jl:997
Arguments
#self#::Core.Const(sum)
f::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
a::CartesianIndices{5, NTuple{5, Base.OneTo{Int64}}}
Body::Any
1 ─ nothing
│ %2 = Base.:(:)::Core.Const(Colon())
│ %3 = Core.NamedTuple()::Core.Const(NamedTuple())
│ %4 = Base.pairs(%3)::Core.Const(Base.Pairs{Symbol, Union{}, Tuple{}, @NamedTuple{}}())
│ %5 = Base.:(var"#sum#829")(%2, %4, #self#, f, a)::Any
└── return %5
Hm. That’s wierd. Lets see what f does to the first element of a CartesianIndex:
julia> @code_warntype f(CartesianIndices(ntuple(_ -> 3, 5)) |> first)
MethodInstance for (::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}})(::CartesianIndex{5})
from (::var"#_f#35")(_ι) @ Main REPL[24]:3
Arguments
#self#::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
_ι::CartesianIndex{5}
Locals
#33::var"#33#37"{Vector{Float64}}
#32::var"#32#36"{Vector{Float64}}
ω::Float64
z::Tuple{Float64, Float64}
ι::NTuple{5, Int64}
Body::Float64
1 ─ (ι = Main.Tuple(_ι))
│ %2 = Main.:(var"#32#36")::Core.Const(var"#32#36")
│ %3 = Core.getfield(#self#, :x)::Vector{Float64}
│ %4 = Core.typeof(%3)::Core.Const(Vector{Float64})
│ %5 = Core.apply_type(%2, %4)::Core.Const(var"#32#36"{Vector{Float64}})
│ %6 = Core.getfield(#self#, :x)::Vector{Float64}
│ (#32 = %new(%5, %6))
│ %8 = #32::var"#32#36"{Vector{Float64}}
│ %9 = ι::NTuple{5, Int64}
│ %10 = Core.getfield(#self#, :B)::Tuple{Float64, Float64}
│ (z = Main.map(%8, %9, %10))
│ %12 = Main.:(var"#33#37")::Core.Const(var"#33#37")
│ %13 = Core.getfield(#self#, :w)::Vector{Float64}
│ %14 = Core.typeof(%13)::Core.Const(Vector{Float64})
│ %15 = Core.apply_type(%12, %14)::Core.Const(var"#33#37"{Vector{Float64}})
│ %16 = Core.getfield(#self#, :w)::Vector{Float64}
│ (#33 = %new(%15, %16))
│ %18 = #33::var"#33#37"{Vector{Float64}}
│ (ω = Main.prod(%18, ι))
│ %20 = Core.getfield(#self#, :f)::Core.Const(sum)
│ %21 = (%20)(z)::Float64
│ %22 = (%21 * ω)::Float64
└── return %22
Okay, that’s even weirder, because this all looks good. This makes me suspect the compiler is just giving up somewhere in the implementation of sum due to some heuristic. I wonder if we can bypass it by replacing sum with something simpler:
julia> @code_warntype mapfoldl(f, +, CartesianIndices(ntuple(_ -> 3, 5)))
MethodInstance for mapfoldl(::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}, ::typeof(+), ::CartesianIndices{5, NTuple{5, Base.OneTo{Int64}}})
from mapfoldl(f, op, itr; init) @ Base reduce.jl:175
Arguments
#self#::Core.Const(mapfoldl)
f::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
op::Core.Const(+)
itr::CartesianIndices{5, NTuple{5, Base.OneTo{Int64}}}
Body::Float64
1 ─ %1 = Base._InitialValue()::Core.Const(Base._InitialValue())
│ %2 = Base.:(var"#mapfoldl#298")(%1, #self#, f, op, itr)::Float64
└── return %2
aha, it’s all inferred! So lets see what happens if we replace sum(f, ci) with mapfoldl(f, +, ci) in the original function:
julia> function cubature(f::F, quadrature::Quadrature, B::NTuple{N}) where {F,N}
(; x, w) = quadrature
function _f(_ι)
ι = Tuple(_ι)
z = map((i, b) -> getindex(x, i) * b, ι, B)
ω = prod(i -> getindex(w, i), ι)
f(z) * ω
end
K = length(x)
mapfoldl(+, _f, CartesianIndices(ntuple(_ -> K, Val(N)))) * prod(B)
end;
julia> @code_warntype cubature(sum, Q, (1.0, 2.0))
MethodInstance for cubature(::typeof(sum), ::Quadrature{Float64}, ::Tuple{Float64, Float64})
from cubature(f::F, quadrature::Quadrature, B::Tuple{Vararg{T, N}} where T) where {F, N} @ Main REPL[30]:1
Static Parameters
F = typeof(sum)
N = 2
Arguments
#self#::Core.Const(cubature)
f::Core.Const(sum)
quadrature::Quadrature{Float64}
B::Tuple{Float64, Float64}
Locals
#47::var"#47#51"{Int64}
K::Int64
_f::var"#_f#48"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
w::Vector{Float64}
x::Vector{Float64}
Body::Union{}
1 ─ (x = Base.getproperty(quadrature, :x))
│ (w = Base.getproperty(quadrature, :w))
│ %3 = Main.:(var"#_f#48")::Core.Const(var"#_f#48")
│ %4 = Core.typeof(f)::Core.Const(typeof(sum))
│ %5 = Core.typeof(B)::Core.Const(Tuple{Float64, Float64})
│ %6 = Core.typeof(w)::Core.Const(Vector{Float64})
│ %7 = Core.typeof(x)::Core.Const(Vector{Float64})
│ %8 = Core.apply_type(%3, %4, %5, %6, %7)::Core.Const(var"#_f#48"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}})
│ %9 = w::Vector{Float64}
│ (_f = %new(%8, f, B, %9, x))
│ (K = Main.length(x))
│ %12 = Main.:+::Core.Const(+)
│ %13 = _f::var"#_f#48"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
│ %14 = Main.:(var"#47#51")::Core.Const(var"#47#51")
│ %15 = Core.typeof(K)::Core.Const(Int64)
│ %16 = Core.apply_type(%14, %15)::Core.Const(var"#47#51"{Int64})
│ (#47 = %new(%16, K))
│ %18 = #47::var"#47#51"{Int64}
│ %19 = Main.Val($(Expr(:static_parameter, 2)))::Core.Const(Val{2}())
│ %20 = Main.ntuple(%18, %19)::Tuple{Int64, Int64}
│ %21 = Main.CartesianIndices(%20)::CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}
│ Main.mapfoldl(%12, %13, %21)
│ Core.Const(:(Main.prod(B)))
│ Core.Const(:(%22 * %23))
└── Core.Const(:(return %24))
Yep, that was it.
I suspect then what happened is that sum is specialized on CartesianIndices to try and statically unroll, but it ran up against some unrolling heuristic and gave up optimizing it, so the “dumber” mapfoldl turned out better.
If numerical stability is a concern, you might find it better to use mapreduce instead of mapfoldl.