Optimising superbee! function

I’d write this like so:

function superbee!(f::F, s, a, b, h) where {F}
  for i ∈ eachindex(s, a, b)
    s[i] = f(a[i], b[i])*h
  end
end

The function f is what we want to optimize. In the following, f0 was the starting point, very similar to your initial code, f1 is a modified version of f0, and so on. The goal was to eliminate the minmod and maxmod functions, it’s accomplished with f5:

function f0(a::T, b::T) where {T}
  maxmod = (x, y) -> (sign(x) + sign(y))*max(abs(x), abs(y))/2
  minmod = (x, y) -> (sign(x) + sign(y))*min(abs(x), abs(y))/2
  maxmod(minmod(a, 2*b), minmod(2*a,b))::T
end

function f1(a::T, b::T) where {T}
  is_neg = x -> x < false
  maxmod = (x, y) -> (sign(x) + sign(y))*max(abs(x), abs(y))/2
  minmod_negative_a = (x, y) -> (sign(y) - 1)*min(-x, abs(y))/2
  minmod_positive_a = (x, y) -> (sign(y) + 1)*min( x, abs(y))/2
  na = maxmod(minmod_negative_a(a, 2*b), minmod_negative_a(2*a, b))::T
  pa = maxmod(minmod_positive_a(a, 2*b), minmod_positive_a(2*a, b))::T
  nonzero = ifelse(is_neg(a), na, pa)::T
  ifelse((iszero(a) | iszero(b)), zero(T), nonzero)::T
end

function f2(a::T, b::T) where {T}
  is_neg = x -> x < false
  maxmod = (x, y) -> (sign(x) + sign(y))*max(abs(x), abs(y))/2
  a_is_neg = is_neg(a)
  b_is_neg = is_neg(b)
  zer = zero(T)::T
  nanb = maxmod(max(a, 2*b), max(2*a, b))::T
  napb = zer
  panb = zer
  papb = maxmod(min(a, 2*b), min(2*a, b))::T
  na = ifelse(b_is_neg, nanb, napb)::T
  pa = ifelse(b_is_neg, panb, papb)::T
  nonzero = ifelse(a_is_neg, na, pa)::T
  ifelse((iszero(a) | iszero(b)), zer, nonzero)::T
end

function f3(a::T, b::T) where {T}
  is_neg = x -> x < false
  maxmod = (x, y) -> (sign(x) + sign(y))*max(abs(x), abs(y))/2
  a_is_neg = is_neg(a)
  b_is_neg = is_neg(b)
  nanb = maxmod(max(a, 2*b), max(2*a, b))::T
  papb = maxmod(min(a, 2*b), min(2*a, b))::T
  nonzero = ifelse(a_is_neg, nanb, papb)::T
  ifelse((iszero(a) | iszero(b) | (a_is_neg != b_is_neg)), zero(T), nonzero)::T
end

function f4(a::T, b::T) where {T}
  minmax = (x, y) -> ifelse((y < x), (y, x), (x, y))
  is_neg = x -> x < false
  maxmod = (x, y) -> (sign(x) + sign(y))*max(abs(x), abs(y))/2
  a_is_neg = is_neg(a)
  b_is_neg = is_neg(b)
  (nanb, papb) = let
    (t21, t11) = minmax(  a, 2*b)
    (t22, t12) = minmax(2*a,   b)
    t1 = maxmod(t11, t12)::T
    t2 = maxmod(t21, t22)::T
    (t1, t2)
  end
  nonzero = ifelse(a_is_neg, nanb, papb)::T
  ifelse((iszero(a) | iszero(b) | (a_is_neg != b_is_neg)), zero(T), nonzero)::T
end

function f5(a::T, b::T) where {T}
  minmax = (x, y) -> ifelse((y < x), (y, x), (x, y))
  is_neg = x -> x < false
  a_is_neg = is_neg(a)
  b_is_neg = is_neg(b)
  (nanb, papb) = let
    (t21, t11) = minmax(  a, 2*b)
    (t22, t12) = minmax(2*a,   b)
    t1 = min(t11, t12)::T
    t2 = max(t21, t22)::T
    (t1, t2)
  end
  nonzero = ifelse(a_is_neg, nanb, papb)::T
  ifelse((iszero(a) | iszero(b) | (a_is_neg != b_is_neg)), zero(T), nonzero)::T
end

f5 seems to be quite fast:

julia> @btime superbee!(f0, s, a, b, h) setup=(n=100; s=rand(n); a=rand(n); b=rand(n); h=rand();)
  154.966 ns (0 allocations: 0 bytes)

julia> @btime superbee!(f4, s, a, b, h) setup=(n=100; s=rand(n); a=rand(n); b=rand(n); h=rand();)
  126.949 ns (0 allocations: 0 bytes)

julia> @btime superbee!(f5, s, a, b, h) setup=(n=100; s=rand(n); a=rand(n); b=rand(n); h=rand();)
  56.712 ns (0 allocations: 0 bytes)

julia> versioninfo()
Julia Version 1.12.0-DEV.280
Commit d8d384278d6 (2024-04-02 10:25 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 8 × AMD Ryzen 3 5300U with Radeon Graphics
  WORD_SIZE: 64
  LLVM: libLLVM-16.0.6 (ORCJIT, znver2)
Threads: 1 default, 0 interactive, 1 GC (on 8 virtual cores)
3 Likes