I already wrote this before I thought to ask Oscar Smith what he meant by “PR”. I am not an user of GitHab and do not know how to make pull requests, so I leave it here.
THIS TEXT AND CODE IS PROVIDED “AS IS”, WITHOUT WARRANTY, ECT.
Oscar Smith asked me to “make a PR to base for the Float64^Float64 algorithm” if I can find a faster way of “bool ^ float” than here: Speed up `x::Bool ^ y::Float64`.
function f3(x::Bool, y::T) where T<:AbstractFloat
ifelse(x | iszero(y), one(T), abs(y) * T(Inf) * (!(y>0)))
end
I found a faster way witch done on my processor without hard using low-level specific like intrinsics and so on.
I checked my achievements on Ryzen 9 and Core i3.
On older processors (for example on my core i3) Oscar Smith’s way is faster.
Since I hope that the hardware and Julia will progress, I consider the result achieved and fulfill the request of Oscar Smith by making a PR as best I can.
Let’s skip the reasons of “bool ^ float” witch are different for everyone,
and if we are here and I understood everything correctly with my english and google-translate… Ready? Go!
Fast "bool ^ float" PR
Are you going to rased stupid bool
to the power of insidious float
? But can’t sit still while a logical one is raised to the power of negative zero? I really understand you…
Take this!
pow_fast(x::Bool, y::T) where T <: AbstractFloat = ifelse(x | iszero(y), T(1) , ifelse(isnan(y), T(NaN), ifelse(signbit(y), T(Inf), T(0))));
pow_fast(x::Bool, y::BigFloat) = big(ifelse(x | iszero(y), 1.0 , ifelse(isnan(y), NaN, ifelse(signbit(y), Inf, 0.0))));
100 (about) times faster than native! (on Ryzen 9)
“Ctrl + A + C” and it’s yours!
This hot cake mined from the permafrost and delivered across the snowy suning plain specially for you with love!
Just two line save you time!
Fast “float ^ bool” as a gift!
pow_fast(x::T, y::Bool) where T <: AbstractFloat = y ? copy(x) : T(1)
pow_fast(x::BigFloat, y::Bool) = y ? x : big(1.0)
Code, benchmarks, tests:
fast bool ^ float:
using BenchmarkTools, Test
#
pow_native(x::X, y::Y) where {X,Y} = x ^ y
# by Oscar Smith, faster on intel core i3 in some tests
# https://discourse.julialang.org/t/speed-up-x-bool-y-float64/90601
function pow_fast_1(x::Bool, y::T) where T <: AbstractFloat
ifelse(x | iszero(y), one(T), abs(y) * T(Inf) * (!(y>0)))
end;
# my last way (pow_fast), faster on Ryzen 9
pow_fast_2(x::Bool, y::T) where T <: AbstractFloat = ifelse(x | iszero(y), T(1) , ifelse(isnan(y), T(NaN), ifelse(signbit(y), T(Inf), T(0))));
pow_fast_2(x::Bool, y::BigFloat) = big(ifelse(x | iszero(y), 1.0 , ifelse(isnan(y), NaN, ifelse(signbit(y), Inf, 0.0))));
# test data
function get_test_data(::Type{T}) where T <: AbstractFloat
n = 10000
m = n ÷ 100 # for special values of the same type NaN, -Inf ...
r = zeros(T, n) # for result
x = rand(Bool, n)
y = (T <: BigFloat ? big.(randn(Float64, n)) : randn(T, n)) .^ 111
y[rand(1:n, m)] .= T(NaN)
y[rand(1:n, m)] .= -T(Inf)
y[rand(1:n, m)] .= T(Inf)
y[rand(1:n, m)] .= nextfloat(-T(Inf))
y[rand(1:n, m)] .= prevfloat(T(Inf))
y[rand(1:n, m)] .= T(0)
y[rand(1:n, m)] .= -T(0)
return n, r, x, y
end
#tests
@testset verbose = true "fast `bool ^ float`" begin
for Flt in subtypes(AbstractFloat)
@testset verbose = true " $Flt" begin
n, r, x, y = get_test_data(Flt)
for pow_fast in (pow_fast_1, pow_fast_2)
@testset "$pow_fast" begin
for i = 1 : n
r_native = pow_native(x[i],y[i])
r_fast = pow_fast(x[i],y[i])
# big(1.0) !== big(1.0), NaN != NaN
@test r_native == r_fast ? true :
isnan(r_native) & isnan(r_fast) ? true : false
end
end
end
end
end
end;
# benchmarks
f!(f,r,x,y,n) = for i = 1 : n
r[i] = f(x[i], y[i])
end;
for Flt in subtypes(AbstractFloat)
n, r, x, y = get_test_data(Flt)
println("$Flt")
for pow in (pow_native, pow_fast_1, pow_fast_2)
println(" $pow")
@btime f!($pow,$r,$x,$y,$n)
end
end
fast float ^ bool:
using BenchmarkTools, Test
pow_native(x::X, y::Y) where {X,Y} = x ^ y
# NOTE pow_fast(x::T, y::Bool) do not translate -0.0 -> 0.0
# pow_native(-0.0, true) -> 0.0
# pow_fast(-0.0, true) -> -0.0
pow_fast(x::T, y::Bool) where T <: AbstractFloat = y ? copy(x) : T(1)
pow_fast(x::BigFloat, y::Bool) = y ? x : big(1.0)
# test data
function get_test_data(::Type{T}) where T <: AbstractFloat
n = 100000
m = n ÷ 100 # for special values of the same type NaN, -Inf ...
r = zeros(T, n) # for result
x = (T <: BigFloat ? big.(randn(Float64, n)) : randn(T, n)) .^ 111
y = rand(Bool, n)
x[rand(1:n, m)] .= T(NaN)
x[rand(1:n, m)] .= -T(Inf)
x[rand(1:n, m)] .= T(Inf)
x[rand(1:n, m)] .= nextfloat(-T(Inf))
x[rand(1:n, m)] .= prevfloat(T(Inf))
x[rand(1:n, m)] .= T(0)
x[rand(1:n, m)] .= -T(0)
return n, r, x, y
end
@testset verbose = true "fast `float ^ bool`" begin
for Flt in subtypes(AbstractFloat)
@testset verbose = true " $Flt" begin
n, r, x, y = get_test_data(Flt)
for pow_fast in (pow_fast,)
@testset "$pow_fast" begin
for i = 1 : n
r_native = pow_native(x[i],y[i])
r_fast = pow_fast(x[i],y[i])
# big(1.0) !== big(1.0), NaN != NaN
@test r_native == r_fast ? true :
isnan(r_native) & isnan(r_fast) ? true : false
end
end
end
end
end
end;
# benchmarks
f!(f,r,x,y,n) = for i = 1 : n
r[i] = f(x[i], y[i])
end;
for Flt in subtypes(AbstractFloat)
n, r, x, y = get_test_data(Flt)
println("$Flt")
for pow in (pow_native, pow_fast)
println(" $pow")
@btime f!($pow,$r,$x,$y,$n)
end
end