Meta-programming an if-else statement of user-defined length

When I tried to reproduce the result, I guess I figured out another possible source of slowdown, it’s Options type.

With overall setup

function BINOP!(x::Array{Float32, 1}, y::Array{Float32, 1}, i::Int, clen::Int, options::Options)
    op = options.binops[i]
    BINOP!(op, x, y, clen, options)
end

function BINOP!(op::F, x::Array{Float32, 1}, y::Array{Float32, 1}, clen::Int, options::Options) where F
    @inbounds @simd for j=1:clen
        x[j] = op(x[j], y[j])
    end
end

function BINOP2!(x, y, i, clen, options)
    if i == 1
        @inbounds @simd for j=1:clen
            x[j] = (+)(x[j], y[j])
        end
    end
end

x = rand(Float32, 100)
y = rand(Float32, 100)
clen = length(x)

julia> @btime BINOP2!($x, $y, 1, $clen, $options)
  7.923 ns (0 allocations: 0 bytes)

I have tried three versions
1.

struct Options
  binops
end

options = Options([+, -, *])
julia> @btime BINOP!($x, $y, 1, $clen, $options)
  44.011 ns (1 allocation: 16 bytes)
struct Options{T}
    binops::T
end
options = Options([+, -, *])

julia> @btime BINOP!($x, $y, 1, $clen, $options)
  31.602 ns (1 allocation: 16 bytes)
struct Options{T}
    binops::T
end
options = Options((+, -, *))

julia> @btime BINOP!($x, $y, 1, $clen, $options)
  23.229 ns (0 allocations: 0 bytes)

So, of all of this versions, fully typed tuple version of operations is the fastest. I think the difference 23 - 8 = 15 ns is the price that one have to pay to figure out op dynamically and it can’t be improved further.

If this performance is not enough then closest to this issue is union-splitting and write macro along the lines of ManualDispatch.jl or this snippet. But for it to work, all operations should be defined at compile time.

1 Like