So here’s part of my strategy for stuff like this: modify the function to return the closure, and then start inspecting it:
julia> function cubature(f::F, quadrature::Quadrature, B::NTuple{N}) where {F,N}
(; x, w) = quadrature
function _f(_ι)
ι = Tuple(_ι)
z = map((i, b) -> getindex(x, i) * b, ι, B)
ω = prod(i -> getindex(w, i), ι)
f(z) * ω
end
K = length(x)
sum(_f, CartesianIndices(ntuple(_ -> K, Val(N)))) * prod(B)
return _f
end
cubature (generic function with 1 method)
julia> f = cubature(sum, Q, (1.0, 2.0))
(::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}) (generic function with 1 method)
julia> fieldtypes(typeof(f))
(typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64})
Okay, there’s no Box
es in there, so far so good. Let’s see what sum(f, ::CartesianIndices)
does:
julia> @code_warntype sum(f, CartesianIndices(ntuple(_ -> 3, 5)))
MethodInstance for sum(::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}, ::CartesianIndices{5, NTuple{5, Base.OneTo{Int64}}})
from sum(f, a::AbstractArray; dims, kw...) @ Base reducedim.jl:997
Arguments
#self#::Core.Const(sum)
f::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
a::CartesianIndices{5, NTuple{5, Base.OneTo{Int64}}}
Body::Any
1 ─ nothing
│ %2 = Base.:(:)::Core.Const(Colon())
│ %3 = Core.NamedTuple()::Core.Const(NamedTuple())
│ %4 = Base.pairs(%3)::Core.Const(Base.Pairs{Symbol, Union{}, Tuple{}, @NamedTuple{}}())
│ %5 = Base.:(var"#sum#829")(%2, %4, #self#, f, a)::Any
└── return %5
Hm. That’s wierd. Lets see what f
does to the first element of a CartesianIndex
:
julia> @code_warntype f(CartesianIndices(ntuple(_ -> 3, 5)) |> first)
MethodInstance for (::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}})(::CartesianIndex{5})
from (::var"#_f#35")(_ι) @ Main REPL[24]:3
Arguments
#self#::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
_ι::CartesianIndex{5}
Locals
#33::var"#33#37"{Vector{Float64}}
#32::var"#32#36"{Vector{Float64}}
ω::Float64
z::Tuple{Float64, Float64}
ι::NTuple{5, Int64}
Body::Float64
1 ─ (ι = Main.Tuple(_ι))
│ %2 = Main.:(var"#32#36")::Core.Const(var"#32#36")
│ %3 = Core.getfield(#self#, :x)::Vector{Float64}
│ %4 = Core.typeof(%3)::Core.Const(Vector{Float64})
│ %5 = Core.apply_type(%2, %4)::Core.Const(var"#32#36"{Vector{Float64}})
│ %6 = Core.getfield(#self#, :x)::Vector{Float64}
│ (#32 = %new(%5, %6))
│ %8 = #32::var"#32#36"{Vector{Float64}}
│ %9 = ι::NTuple{5, Int64}
│ %10 = Core.getfield(#self#, :B)::Tuple{Float64, Float64}
│ (z = Main.map(%8, %9, %10))
│ %12 = Main.:(var"#33#37")::Core.Const(var"#33#37")
│ %13 = Core.getfield(#self#, :w)::Vector{Float64}
│ %14 = Core.typeof(%13)::Core.Const(Vector{Float64})
│ %15 = Core.apply_type(%12, %14)::Core.Const(var"#33#37"{Vector{Float64}})
│ %16 = Core.getfield(#self#, :w)::Vector{Float64}
│ (#33 = %new(%15, %16))
│ %18 = #33::var"#33#37"{Vector{Float64}}
│ (ω = Main.prod(%18, ι))
│ %20 = Core.getfield(#self#, :f)::Core.Const(sum)
│ %21 = (%20)(z)::Float64
│ %22 = (%21 * ω)::Float64
└── return %22
Okay, that’s even weirder, because this all looks good. This makes me suspect the compiler is just giving up somewhere in the implementation of sum
due to some heuristic. I wonder if we can bypass it by replacing sum
with something simpler:
julia> @code_warntype mapfoldl(f, +, CartesianIndices(ntuple(_ -> 3, 5)))
MethodInstance for mapfoldl(::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}, ::typeof(+), ::CartesianIndices{5, NTuple{5, Base.OneTo{Int64}}})
from mapfoldl(f, op, itr; init) @ Base reduce.jl:175
Arguments
#self#::Core.Const(mapfoldl)
f::var"#_f#35"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
op::Core.Const(+)
itr::CartesianIndices{5, NTuple{5, Base.OneTo{Int64}}}
Body::Float64
1 ─ %1 = Base._InitialValue()::Core.Const(Base._InitialValue())
│ %2 = Base.:(var"#mapfoldl#298")(%1, #self#, f, op, itr)::Float64
└── return %2
aha, it’s all inferred! So lets see what happens if we replace sum(f, ci)
with mapfoldl(f, +, ci)
in the original function:
julia> function cubature(f::F, quadrature::Quadrature, B::NTuple{N}) where {F,N}
(; x, w) = quadrature
function _f(_ι)
ι = Tuple(_ι)
z = map((i, b) -> getindex(x, i) * b, ι, B)
ω = prod(i -> getindex(w, i), ι)
f(z) * ω
end
K = length(x)
mapfoldl(+, _f, CartesianIndices(ntuple(_ -> K, Val(N)))) * prod(B)
end;
julia> @code_warntype cubature(sum, Q, (1.0, 2.0))
MethodInstance for cubature(::typeof(sum), ::Quadrature{Float64}, ::Tuple{Float64, Float64})
from cubature(f::F, quadrature::Quadrature, B::Tuple{Vararg{T, N}} where T) where {F, N} @ Main REPL[30]:1
Static Parameters
F = typeof(sum)
N = 2
Arguments
#self#::Core.Const(cubature)
f::Core.Const(sum)
quadrature::Quadrature{Float64}
B::Tuple{Float64, Float64}
Locals
#47::var"#47#51"{Int64}
K::Int64
_f::var"#_f#48"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
w::Vector{Float64}
x::Vector{Float64}
Body::Union{}
1 ─ (x = Base.getproperty(quadrature, :x))
│ (w = Base.getproperty(quadrature, :w))
│ %3 = Main.:(var"#_f#48")::Core.Const(var"#_f#48")
│ %4 = Core.typeof(f)::Core.Const(typeof(sum))
│ %5 = Core.typeof(B)::Core.Const(Tuple{Float64, Float64})
│ %6 = Core.typeof(w)::Core.Const(Vector{Float64})
│ %7 = Core.typeof(x)::Core.Const(Vector{Float64})
│ %8 = Core.apply_type(%3, %4, %5, %6, %7)::Core.Const(var"#_f#48"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}})
│ %9 = w::Vector{Float64}
│ (_f = %new(%8, f, B, %9, x))
│ (K = Main.length(x))
│ %12 = Main.:+::Core.Const(+)
│ %13 = _f::var"#_f#48"{typeof(sum), Tuple{Float64, Float64}, Vector{Float64}, Vector{Float64}}
│ %14 = Main.:(var"#47#51")::Core.Const(var"#47#51")
│ %15 = Core.typeof(K)::Core.Const(Int64)
│ %16 = Core.apply_type(%14, %15)::Core.Const(var"#47#51"{Int64})
│ (#47 = %new(%16, K))
│ %18 = #47::var"#47#51"{Int64}
│ %19 = Main.Val($(Expr(:static_parameter, 2)))::Core.Const(Val{2}())
│ %20 = Main.ntuple(%18, %19)::Tuple{Int64, Int64}
│ %21 = Main.CartesianIndices(%20)::CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}
│ Main.mapfoldl(%12, %13, %21)
│ Core.Const(:(Main.prod(B)))
│ Core.Const(:(%22 * %23))
└── Core.Const(:(return %24))
Yep, that was it.
I suspect then what happened is that sum
is specialized on CartesianIndices
to try and statically unroll, but it ran up against some unrolling heuristic and gave up optimizing it, so the “dumber” mapfoldl
turned out better.
If numerical stability is a concern, you might find it better to use mapreduce
instead of mapfoldl
.