Coping with `filter` type instability

I am tracking down an annoying type instability in my code and ended up in Iterators.filter from Base. Consider:

julia> f = Iterators.filter(el->el==2, rand(1:3, 1000000))
Base.Iterators.Filter{##33#34,Array{Int64,1}}(#33, [2, 1, 3, 3, 1, 1, 1, 3, 1, 1  …  1, 1, 3, 1, 3, 1, 2, 1, 1, 2])

# define this function
julia> function bar(f, s)
           for el in f
               s += el
           end
           s
       end
bar (generic function with 1 method)

# benchmark: lots of allocations!
julia> @btime bar($f, 0)
  17.910 ms (1000318 allocations: 35.62 MiB)
666878

Looking at @code_warntype bar(f, 0) gives

ia> @code_warntype bar(f, 0)
Variables:
  #self#::#bar
  f::Base.Iterators.Filter{##33#34,Array{Int64,1}}
  s@_3::Int64
  el::Int64
  #temp#@_5::Union{Tuple{Bool,Int64,Int64}, Tuple{Bool}}
  s@_6::Int64
  #temp#@_7::Core.MethodInstance
  #temp#@_8::Bool
  #temp#@_9::Core.MethodInstance
  #temp#@_10::Tuple{Int64,Tuple{Bool,Int64,Int64}}

Body:
  begin
      s@_6::Int64 = s@_3::Int64
      #temp#@_5::Union{Tuple{Bool,Int64,Int64}, Tuple{Bool}} = $(Expr(:invoke, MethodInstance for start_filter(::##33#34, ::Array{Int64,1}), :(Base.Iterators.start_filter), :($(QuoteNode(#33))), :((Core.getfield)(f, :itr)::Array{Int64,1})))
      3:
      unless (#temp#@_5::Union{Tuple{Bool,Int64,Int64}, Tuple{Bool}} isa Tuple{Bool,Int64,Int64})::Bool goto 7
      #temp#@_7::Core.MethodInstance = MethodInstance for done(::Base.Iterators.Filter{##33#34,Array{Int64,1}}, ::Tuple{Bool,Int64,Int64})
      goto 16
      7:
      unless (#temp#@_5::Union{Tuple{Bool,Int64,Int64}, Tuple{Bool}} isa Tuple{Bool})::Bool goto 11
      #temp#@_7::Core.MethodInstance = MethodInstance for done(::Base.Iterators.Filter{##33#34,Array{Int64,1}}, ::Tuple{Bool})
      goto 16
      11:
      goto 13
      13:
      #temp#@_8::Bool = (Base.done)(f::Base.Iterators.Filter{##33#34,Array{Int64,1}}, #temp#@_5::Union{Tuple{Bool,Int64,Int64}, Tuple{Bool}})::Bool
      goto 18
      16:
      #temp#@_8::Bool = $(Expr(:invoke, :(#temp#@_7), :(Base.done), :(f), :(#temp#@_5)))
      18:
      unless (Base.not_int)(#temp#@_8::Bool)::Bool goto 42
      unless (#temp#@_5::Union{Tuple{Bool,Int64,Int64}, Tuple{Bool}} isa Tuple{Bool,Int64,Int64})::Bool goto 23
      #temp#@_9::Core.MethodInstance = MethodInstance for next(::Base.Iterators.Filter{##33#34,Array{Int64,1}}, ::Tuple{Bool,Int64,Int64})
      goto 32
      23:
      unless (#temp#@_5::Union{Tuple{Bool,Int64,Int64}, Tuple{Bool}} isa Tuple{Bool})::Bool goto 27
      #temp#@_9::Core.MethodInstance = MethodInstance for next(::Base.Iterators.Filter{##33#34,Array{Int64,1}}, ::Tuple{Bool})
      goto 32
      27:
      goto 29
      29:
      #temp#@_10::Tuple{Int64,Tuple{Bool,Int64,Int64}} = (Base.next)(f::Base.Iterators.Filter{##33#34,Array{Int64,1}}, #temp#@_5::Union{Tuple{Bool,Int64,Int64}, Tuple{Bool}})::Tuple{Int64,Tuple{Bool,Int64,Int64}}
      goto 34
      32:
      #temp#@_10::Tuple{Int64,Tuple{Bool,Int64,Int64}} = $(Expr(:invoke, :(#temp#@_9), :(Base.next), :(f), :(#temp#@_5)))
      34:
      SSAValue(1) = #temp#@_10::Tuple{Int64,Tuple{Bool,Int64,Int64}}
      el::Int64 = (Core.getfield)(SSAValue(1), 1)::Int64
      #temp#@_5::Union{Tuple{Bool,Int64,Int64}, Tuple{Bool}} = (Core.getfield)(SSAValue(1), 2)::Tuple{Bool,Int64,Int64} # line 3:
      s@_6::Int64 = (Base.add_int)(s@_6::Int64, el::Int64)::Int64
      40:
      goto 3
      42:  # line 5:
      return s@_6::Int64
  end::Int64

where the Unions occur because the start_filter function in Base/iterators.jl filter is clearly not type stable.

Has anyone encountered similar instabilities? Any work around?

Thanks!

Would using some form of the in-place version of filter (filter!) solve any issues?

julia> f = Base.filter!(el->el==2, rand(1:3, 1000000))

julia> function bar(f, s)
                  for el in f
                      s += el
                  end
                  s
              end
bar (generic function with 1 method)

julia> @btime bar($f, 0)
  146.429 μs (0 allocations: 0 bytes)
665714

julia> @code_warntype bar(f, 0)
Variables:
  #self#::#bar
  f::Array{Int64,1}
  s@_3::Int64
  el::Int64
  #temp#::Int64
  s@_6::Int64

Body:
  begin
      s@_6::Int64 = s@_3::Int64
      #temp#::Int64 = 1
      3:
      unless (Base.not_int)((#temp#::Int64 === (Base.add_int)((Base.arraylen)(f::Array{Int64,1})::Int64, 1)::Int64)::Bool)::Bool goto 13
      SSAValue(2) = (Base.arrayref)(f::Array{Int64,1}, #temp#::Int64)::Int64
      SSAValue(3) = (Base.add_int)(#temp#::Int64, 1)::Int64
      el::Int64 = SSAValue(2)
      #temp#::Int64 = SSAValue(3) # line 3:
      s@_6::Int64 = (Base.add_int)(s@_6::Int64, el::Int64)::Int64
      11:
      goto 3
      13:  # line 5:
      return s@_6::Int64
  end::Int64

Note this documentation from Iterators.filter, could account for the allocations you’re seeing:

  This function is lazy; that is, it is guaranteed to return in Θ(1) time and use Θ(1) additional space, and flt will not be called by an invocation of filter. Calls to flt will
  be made when iterating over the returned iterable object. These calls are not cached and repeated calls will be made when reiterating.

Not a julia expert, just spitballing ideas :slight_smile:

Thanks, but filter! removes elements from the collection, so effectively f in your example is a vector of ints, not an iterator object. That’s why there are no type instabilities.