One method of the Base function filter
is implemented as follows:
function filter(f, a::Vector)
r = Vector{eltype(a)}(0)
for ai in a
if f(ai)
push!(r, ai)
end
end
return r
end
But in a test, preallocating a destination vector and resizing it, is faster (less allocations etc.).
Furthermore, an extended version which enumerates elements of the array and allows filter function to use the index in the array could also be helpful.
Here are some tests I’ve done:
julia> function f1(f, a::Vector)
r = Vector{eltype(a)}(0)
for ai in a
if f(ai)
push!(r,ai)
end
end
return r
end
f1 (generic function with 1 method)
julia> function f2(f, a::Vector)
r = similar(a)
i = 1
@inbounds for ai in a
if f(ai)
r[i] = ai
i += 1
end
end
resize!(r,i-1)
return r
end
f2 (generic function with 1 method)
julia> function f3(f, a::Vector)
r = similar(a)
i = 1
@inbounds for (j,ai) in enumerate(a)
if f(ai,j)
r[i] = ai
i += 1
end
end
resize!(r,i-1)
return r
end
f3 (generic function with 1 method)
julia> a = [1:5...];
julia> b = [1:100000...];
julia> q1 = x->x != 50000
(::#1) (generic function with 1 method)
julia> q2 = x->x != 3
(::#3) (generic function with 1 method)
julia> w1 = (x,i) -> i != 50000
(::#5) (generic function with 1 method)
julia> w2 = (x,i) -> i != 3
(::#7) (generic function with 1 method)
julia> using BenchmarkTools
julia> @btime f1($q2,$a);
82.450 ns (2 allocations: 128 bytes)
julia> @btime f2($q2,$a);
64.735 ns (1 allocation: 128 bytes)
julia> @btime f1($q1,$b);
654.563 μs (17 allocations: 2.00 MiB)
julia> @btime f2($q1,$b);
188.006 μs (2 allocations: 781.33 KiB)
The tests are on version 0.6.1-pre.92
. The results show f2
is faster than f1
which is similar to filter
in Base. And f3
which allows filtering with the index is fast as well.
Is a PR in order?