I can’t see why/where allocations are occurring when iterating a Combinatorics.Combinations object.
Here’s the relevant code extract from the Combinatorics package:
#The Combinations iterator
struct Combinations
n::Int
t::Int
end
function Base.iterate(c::Combinations, s = [min(c.t - 1, i) for i in 1:c.t])
if c.t == 0 # special case to generate 1 result for t==0
isempty(s) && return (s, [1])
return
end
for i in c.t:-1:1
s[i] += 1
if s[i] > (c.n - (c.t - i))
continue
end
for j in i+1:c.t
s[j] = s[j-1] + 1
end
break
end
s[1] > c.n - c.t + 1 && return
(s, s)
end
And here’s the allocations:
using Combinatorics
function test1(combs)
x = 0
for c in combs
x += 1
end
x
end
@time test1(Combinatorics.Combinations(30, 12))
4.099971 seconds (86.49 M allocations: 2.578 GiB, 12.73% gc time)
I’ve tried a few variations to eliminate the allocations, but really just stabbing in the dark because I don’t know the reason for the allocations.
This version seems to run allocation-free:
using Combinatorics
function myiterate(c::Combinatorics.Combinations, state = Int[min(c.t - 1, i) for i in 1:c.t])
item = myiterate!(state, c.n, c.t)
if item === nothing
return nothing
else
return (item, state)
end
end
function myiterate!(s::Vector{Int}, n::Int, t::Int)
# item is return value, state is s
if t == 0 # special case to generate 1 result for t==0
if isempty(s)
push!(s, 1)
return Int[]
end
return nothing
end
for i in t:-1:1
s[i] += 1
s[i] > (n - t + i) && continue
for j in i+1:t
s[j] = s[j-1] + 1
end
break
end
s[1] > (n - t + 1) && return nothing
return s
end
function test2(combs)
x = 0
next = myiterate(combs)
while next !== nothing
item, state = next
x += 1
next = myiterate(combs, state)
end
x
end
@time test2(Combinatorics.Combinations(30, 12))
1.747497 seconds (1 allocation: 160 bytes)
To identify the source of the allocations I would suggest using a profiler, if you’re using VSCode @profview_allocs test1(Combinatorics.Combinations(30, 12)) should work in the REPL. Alternatively, Alloccheck.jl may also work.
If your allocations are due to type instabilities, Cthulhu.jl and Jet.jl can be useful for identifying and debugging them.
However, it doesn’t seem to be a “generic” issue with returning a 2-tuple of the same vector:
using BenchmarkTools
function myiterate(v)
v[begin] == v[end] && return nothing
v[begin] += 1
return v, v
end
function test(v)
count = 0
next = myiterate(v)
while next !== nothing
item, state = next
count += 1
next = myiterate(v)
end
return count
end
@time test([0, 7, 10_000_000])
0.006705 seconds (1 allocation: 80 bytes)
Are there any reasons why the function shouldn’t be marked @inline ?
struct Combinations
n::Int
t::Int
end
@inline function Base.iterate(c::Combinations, s = [min(c.t - 1, i) for i in 1:c.t])
if c.t == 0 # special case to generate 1 result for t==0
isempty(s) && return (s, [1])
return
end
for i in c.t:-1:1
s[i] += 1
if s[i] > (c.n - (c.t - i))
continue
end
for j in i+1:c.t
s[j] = s[j-1] + 1
end
break
end
s[1] > c.n - c.t + 1 && return
(s, s)
end
function test1(combs)
x = 0
for c in combs
x += 1
end
x
end
@time test1(Combinations(30, 12))
1.607441 seconds (1 allocation: 160 bytes)