Nested for loops vs Iterators.product performance

Everything required for a fast version is known at compile time, but with varargs (the list... argument) you sometimes need to force specialization using the type annotation Vararg{T,N} where {N}. Also, your implementation had issues with excessive allocation of intermediate arrays, as well as initializing s to a zero of the wrong type (0 is an integer zero). Here’s a fast version:

function sumprod3(lists::Vararg{Any,N}) where {N}
    s = prod(zero ∘ eltype, lists)
    for inds in Iterators.product(eachindex.(lists)...)
        s += prod(lists[i][inds[i]] for i in eachindex(lists, inds))
    end
    return s
end

Benchmarked against a version of sumprod1 with the initialization issue fixed:

julia> using BenchmarkTools

julia> @btime sumprod1fixed($a, $b, $c)
  35.544 ms (0 allocations: 0 bytes)
2.067942582493859e6

julia> @btime sumprod3($a, $b, $c)
  36.265 ms (0 allocations: 0 bytes)
2.0679425824940344e6
Definition of sumprod1fixed
function sumprod1fixed(a, b, c)
    s = zero(eltype(a)) * zero(eltype(b)) * zero(eltype(c))
    for i in eachindex(a), j in eachindex(b), k in eachindex(c)
        s += a[i] * b[j] * c[k]
    end
    return s
end
9 Likes