I’d write this like so:
function superbee!(f::F, s, a, b, h) where {F}
for i ∈ eachindex(s, a, b)
s[i] = f(a[i], b[i])*h
end
end
The function f
is what we want to optimize. In the following, f0
was the starting point, very similar to your initial code, f1
is a modified version of f0
, and so on. The goal was to eliminate the minmod
and maxmod
functions, it’s accomplished with f5
:
function f0(a::T, b::T) where {T}
maxmod = (x, y) -> (sign(x) + sign(y))*max(abs(x), abs(y))/2
minmod = (x, y) -> (sign(x) + sign(y))*min(abs(x), abs(y))/2
maxmod(minmod(a, 2*b), minmod(2*a,b))::T
end
function f1(a::T, b::T) where {T}
is_neg = x -> x < false
maxmod = (x, y) -> (sign(x) + sign(y))*max(abs(x), abs(y))/2
minmod_negative_a = (x, y) -> (sign(y) - 1)*min(-x, abs(y))/2
minmod_positive_a = (x, y) -> (sign(y) + 1)*min( x, abs(y))/2
na = maxmod(minmod_negative_a(a, 2*b), minmod_negative_a(2*a, b))::T
pa = maxmod(minmod_positive_a(a, 2*b), minmod_positive_a(2*a, b))::T
nonzero = ifelse(is_neg(a), na, pa)::T
ifelse((iszero(a) | iszero(b)), zero(T), nonzero)::T
end
function f2(a::T, b::T) where {T}
is_neg = x -> x < false
maxmod = (x, y) -> (sign(x) + sign(y))*max(abs(x), abs(y))/2
a_is_neg = is_neg(a)
b_is_neg = is_neg(b)
zer = zero(T)::T
nanb = maxmod(max(a, 2*b), max(2*a, b))::T
napb = zer
panb = zer
papb = maxmod(min(a, 2*b), min(2*a, b))::T
na = ifelse(b_is_neg, nanb, napb)::T
pa = ifelse(b_is_neg, panb, papb)::T
nonzero = ifelse(a_is_neg, na, pa)::T
ifelse((iszero(a) | iszero(b)), zer, nonzero)::T
end
function f3(a::T, b::T) where {T}
is_neg = x -> x < false
maxmod = (x, y) -> (sign(x) + sign(y))*max(abs(x), abs(y))/2
a_is_neg = is_neg(a)
b_is_neg = is_neg(b)
nanb = maxmod(max(a, 2*b), max(2*a, b))::T
papb = maxmod(min(a, 2*b), min(2*a, b))::T
nonzero = ifelse(a_is_neg, nanb, papb)::T
ifelse((iszero(a) | iszero(b) | (a_is_neg != b_is_neg)), zero(T), nonzero)::T
end
function f4(a::T, b::T) where {T}
minmax = (x, y) -> ifelse((y < x), (y, x), (x, y))
is_neg = x -> x < false
maxmod = (x, y) -> (sign(x) + sign(y))*max(abs(x), abs(y))/2
a_is_neg = is_neg(a)
b_is_neg = is_neg(b)
(nanb, papb) = let
(t21, t11) = minmax( a, 2*b)
(t22, t12) = minmax(2*a, b)
t1 = maxmod(t11, t12)::T
t2 = maxmod(t21, t22)::T
(t1, t2)
end
nonzero = ifelse(a_is_neg, nanb, papb)::T
ifelse((iszero(a) | iszero(b) | (a_is_neg != b_is_neg)), zero(T), nonzero)::T
end
function f5(a::T, b::T) where {T}
minmax = (x, y) -> ifelse((y < x), (y, x), (x, y))
is_neg = x -> x < false
a_is_neg = is_neg(a)
b_is_neg = is_neg(b)
(nanb, papb) = let
(t21, t11) = minmax( a, 2*b)
(t22, t12) = minmax(2*a, b)
t1 = min(t11, t12)::T
t2 = max(t21, t22)::T
(t1, t2)
end
nonzero = ifelse(a_is_neg, nanb, papb)::T
ifelse((iszero(a) | iszero(b) | (a_is_neg != b_is_neg)), zero(T), nonzero)::T
end
f5
seems to be quite fast:
julia> @btime superbee!(f0, s, a, b, h) setup=(n=100; s=rand(n); a=rand(n); b=rand(n); h=rand();)
154.966 ns (0 allocations: 0 bytes)
julia> @btime superbee!(f4, s, a, b, h) setup=(n=100; s=rand(n); a=rand(n); b=rand(n); h=rand();)
126.949 ns (0 allocations: 0 bytes)
julia> @btime superbee!(f5, s, a, b, h) setup=(n=100; s=rand(n); a=rand(n); b=rand(n); h=rand();)
56.712 ns (0 allocations: 0 bytes)
julia> versioninfo()
Julia Version 1.12.0-DEV.280
Commit d8d384278d6 (2024-04-02 10:25 UTC)
Build Info:
Official https://julialang.org/ release
Platform Info:
OS: Linux (x86_64-linux-gnu)
CPU: 8 × AMD Ryzen 3 5300U with Radeon Graphics
WORD_SIZE: 64
LLVM: libLLVM-16.0.6 (ORCJIT, znver2)
Threads: 1 default, 0 interactive, 1 GC (on 8 virtual cores)