Efficient calculation of many complex Lorentzians on a large array

Reading the julia inv source code (https://github.com/JuliaLang/julia/blob/master/base/complex.jl),
it appears that Complex{Float32} inverse convert (widen) to Complex{Float64} and calls the corresponding specialized implementation. It does explain at least a part of the overhead.
I do not know enough to tell if this implementation choice is optimal or not.

inv(z::Complex{<:Union{Float16,Float32}}) =
    oftype(z, inv(widen(z)))

/(z::Complex{T}, w::Complex{T}) where {T<:Union{Float16,Float32}} =
    oftype(z, widen(z)*inv(widen(w)))
ComplexF64 specialization
# robust complex division for double precision
# variables are scaled & unscaled to avoid over/underflow, if necessary
# based on arxiv.1210.4539
#             a + i*b
#  p + i*q = ---------
#             c + i*d
function /(z::ComplexF64, w::ComplexF64)
    a, b = reim(z); c, d = reim(w)
    absa = abs(a); absb = abs(b);  ab = absa >= absb ? absa : absb # equiv. to max(abs(a),abs(b)) but without NaN-handling (faster)
    absc = abs(c); absd = abs(d);  cd = absc >= absd ? absc : absd

    halfov = 0.5*floatmax(Float64)              # overflow threshold
    twounϵ = floatmin(Float64)*2.0/eps(Float64) # underflow threshold

    # actual division operations
    if  ab>=halfov || ab<=twounϵ || cd>=halfov || cd<=twounϵ # over/underflow case
        p,q = scaling_cdiv(a,b,c,d,ab,cd) # scales a,b,c,d before division (unscales after)
    else
        p,q = cdiv(a,b,c,d)
    end

    return ComplexF64(p,q)
end

This text will be hidden

2 Likes