I wanted to build an iterator that performs some none trivial operation on two arrays. In this simple example I wanted to get the boolean output of the element wise comparison
x_i^2+y_i^2>2y_i^2
n_samples=100_000 # A hundered thousand samples
x=rand(1:10,n_samples)
y=rand(1:10,n_samples);
struct BinGenrator{T}
x::Vector{T}
y::Vector{T}
f::Function
end
Base.start(b::BinGenrator)=1
Base.next(b::BinGenrator,i)=(b.f(x[i],y[i]),i+1)
Base.done(b::BinGenrator,i)=i>length(b.x)
f(x,y)=x^2+y^2>2y^2
bg=BinGenrator(x,y,f)
sum(bg)
Benchmarking it, I get
using BenchmarkTools
@btime sum(bg)
7.850 ms (297865 allocations: 4.55 MiB)
Compare this with a non-iterator based implementation.
function fast_counter(x,y)
n=0
for i in eachindex(x)
n+=ifelse(x[i]^2+y[i]^2>2y[i]^2,true,false)
end
n
end
@btime fast_counter(x,y)
118.653 μs (1 allocation: 16 bytes)
Or a Channel
based implementation (still a type of iterator).
function channelGen(x,y,chn_size)
chn=Channel(ctype=Bool,csize=chn_size) do c
for i in eachindex(x)
put!(c,x[i]^2+y[i]^2>2y[i]^2)
end
end
chn
end
function getsum(x,y, chn_size)
c=channelGen(x,y, chn_size)
sum(c)
end
@btime getsum(x,y, 1000)
6.972 ms (236 allocations: 11.53 KiB)
I am quite surprised the the iterator interface is the slowest and allocates orders of magnitude more memory. Can anyone explain this? Is it a bug?