I distilled a significant performance issue in JuMP 0.19-alpha (https://github.com/JuliaOpt/JuMP.jl/issues/1403#issuecomment-437630140) to the following standalone example:
using BenchmarkTools
struct FlippedDictIterator{K,V}
d::Dict{K,V}
end
reorder_iterator(::Nothing) = nothing
reorder_iterator(p::Pair, state::Int) = ((p.second, p.first), state)
reorder_iterator(x) = reorder_iterator(x...)
Base.iterate(fi::FlippedDictIterator) = reorder_iterator(iterate(fi.d))
Base.iterate(fi::FlippedDictIterator, state) = reorder_iterator(iterate(fi.d, state))
# The behavior isn't affected by these 4 lines, but I thought they could help.
Base.IteratorSize(fi::FlippedDictIterator) = Base.IteratorSize(fi.d)
Base.length(fi::FlippedDictIterator) = length(fi.d)
Base.IteratorEltype(fi::FlippedDictIterator) = Base.IteratorEltype(fi.d)
Base.eltype(fi::FlippedDictIterator{K,V}) where {K,V} = Tuple{V, K}
function loop1(d)
s = 0
for (key, val) in d
s += key * length(val)
end
return s
end
function loop2(d)
s = 0
for (val, key) in FlippedDictIterator(d)
s += key * length(val)
end
return s
end
d = Dict{Int, String}()
for i in 1:1000
d[i] = "test"
end
@show loop1(d)
@show loop2(d)
@btime loop1($d)
@btime loop2($d)
Yields (on Julia 1.0.1):
loop1(d) = 2002000
loop2(d) = 2002000
14.323 μs (0 allocations: 0 bytes)
20.913 μs (2000 allocations: 62.50 KiB)
Why are there two allocations per call to iterate(fi::FlippedDictIterator, state)
?