Efficient calculation of many complex Lorentzians on a large array

Ok, tried to check for type instability:

function loopLorentz!(χR::Vector{ComplexF32}, χI::Vector{ComplexF32}, χRC::Vector{ComplexF32}, χIC::Vector{ComplexF32}, a::Vector{ComplexF32}, ωᵣ::Vector{ComplexF32}, ω::Vector{ComplexF32}, γᵢ::Vector{ComplexF32})
    for i = 1#eachindex(γᵢ)
        @code_warntype complexLorentz!(χIC, χRC, a[i], ωᵣ[i], ω, γᵢ[i])
        χI .= χI .+ χIC
    end
    return χI
end

function complexLorentz!(χIC::Vector{ComplexF32}, χRC::Vector{ComplexF32}, aᵢ::ComplexF32, ωᵣᵢ::ComplexF32, ω::Vector{ComplexF32}, γᵢᵢ::ComplexF32) 
    @fastmath χIC .= @. aᵢ / (ωᵣᵢ - ω - im*γᵢᵢ)
    nothing
end

Output:

MethodInstance for complexLorentz!(::Vector{ComplexF32}, ::Vector{ComplexF32}, ::ComplexF32, ::ComplexF32, ::Vector{ComplexF32}, ::ComplexF32)
  from complexLorentz!(χIC::Vector{ComplexF32}, χRC::Vector{ComplexF32}, aᵢ::ComplexF32, ωᵣᵢ::ComplexF32, ω::Vector{ComplexF32}, γᵢᵢ::ComplexF32) in Main at c:\Users\greifenstein\Documents\armstrong\Documents\Code\Julia Spielwiese\comparison_complex_float.jl:13
Arguments
  #self#::Core.Const(complexLorentz!)
  χIC::Vector{ComplexF32}
  χRC::Vector{ComplexF32}
  aᵢ::ComplexF32
  ωᵣᵢ::ComplexF32
  ω::Vector{ComplexF32}
  γᵢᵢ::ComplexF32
Body::Nothing
1 ─ %1  = Base.FastMath::Core.Const(Base.FastMath)
│   %2  = Base.getproperty(%1, :div_fast)::Core.Const(Base.FastMath.div_fast)
│   %3  = Base.FastMath::Core.Const(Base.FastMath)
│   %4  = Base.getproperty(%3, :sub_fast)::Core.Const(Base.FastMath.sub_fast)
│   %5  = Base.FastMath::Core.Const(Base.FastMath)
│   %6  = Base.getproperty(%5, :sub_fast)::Core.Const(Base.FastMath.sub_fast)
│   %7  = Base.broadcasted(%6, ωᵣᵢ, ω)::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.FastMath.sub_fast), Tuple{ComplexF32, Vector{ComplexF32}}}
│   %8  = Base.FastMath::Core.Const(Base.FastMath)
│   %9  = Base.getproperty(%8, :mul_fast)::Core.Const(Base.FastMath.mul_fast)
│   %10 = Base.broadcasted(%9, Main.im, γᵢᵢ)::Core.PartialStruct(Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}, Nothing, typeof(Base.FastMath.mul_fast), Tuple{Complex{Bool}, ComplexF32}}, Any[Core.Const(Base.FastMath.mul_fast), Core.PartialStruct(Tuple{Complex{Bool}, ComplexF32}, Any[Core.Const(im), ComplexF32]), Core.Const(nothing)])
│   %11 = Base.broadcasted(%4, %7, %10)::Core.PartialStruct(Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.FastMath.sub_fast), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.FastMath.sub_fast), Tuple{ComplexF32, Vector{ComplexF32}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}, Nothing, typeof(Base.FastMath.mul_fast), Tuple{Complex{Bool}, ComplexF32}}}}, Any[Core.Const(Base.FastMath.sub_fast), Core.PartialStruct(Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.FastMath.sub_fast), Tuple{ComplexF32, Vector{ComplexF32}}}, 
Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}, Nothing, typeof(Base.FastMath.mul_fast), Tuple{Complex{Bool}, ComplexF32}}}, Any[Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.FastMath.sub_fast), Tuple{ComplexF32, Vector{ComplexF32}}}, Core.PartialStruct(Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}, Nothing, typeof(Base.FastMath.mul_fast), Tuple{Complex{Bool}, ComplexF32}}, Any[Core.Const(Base.FastMath.mul_fast), Core.PartialStruct(Tuple{Complex{Bool}, ComplexF32}, Any[Core.Const(im), ComplexF32]), Core.Const(nothing)])]), Core.Const(nothing)])
│   %12 = Base.broadcasted(%2, aᵢ, %11)::Core.PartialStruct(Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.FastMath.div_fast), Tuple{ComplexF32, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.FastMath.sub_fast), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.FastMath.sub_fast), Tuple{ComplexF32, Vector{ComplexF32}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}, Nothing, typeof(Base.FastMath.mul_fast), Tuple{Complex{Bool}, ComplexF32}}}}}}, Any[Core.Const(Base.FastMath.div_fast), Core.PartialStruct(Tuple{ComplexF32, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.FastMath.sub_fast), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, 
Nothing, typeof(Base.FastMath.sub_fast), Tuple{ComplexF32, Vector{ComplexF32}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}, Nothing, typeof(Base.FastMath.mul_fast), Tuple{Complex{Bool}, ComplexF32}}}}}, Any[ComplexF32, Core.PartialStruct(Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.FastMath.sub_fast), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.FastMath.sub_fast), Tuple{ComplexF32, Vector{ComplexF32}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}, Nothing, typeof(Base.FastMath.mul_fast), Tuple{Complex{Bool}, ComplexF32}}}}, Any[Core.Const(Base.FastMath.sub_fast), Core.PartialStruct(Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.FastMath.sub_fast), Tuple{ComplexF32, Vector{ComplexF32}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}, Nothing, typeof(Base.FastMath.mul_fast), Tuple{Complex{Bool}, ComplexF32}}}, Any[Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.FastMath.sub_fast), Tuple{ComplexF32, Vector{ComplexF32}}}, Core.PartialStruct(Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}, Nothing, typeof(Base.FastMath.mul_fast), Tuple{Complex{Bool}, ComplexF32}}, Any[Core.Const(Base.FastMath.mul_fast), Core.PartialStruct(Tuple{Complex{Bool}, ComplexF32}, Any[Core.Const(im), ComplexF32]), Core.Const(nothing)])]), Core.Const(nothing)])]), Core.Const(nothing)])
│         Base.materialize!(χIC, %12)
└──       return Main.nothing

I don’t see a mention of ComplexF64 or Float64, however, @time instead of @code_warntype does show 18, sometimes 19 allocations :thinking:

  0.002631 seconds (18 allocations: 944 bytes)

  0.002507 seconds (19 allocations: 1.219 KiB)

Edit: allocations happen regardless of using fastmath or not
Edit2: Oh…wait… when using @time in a loop, does this by itself allocate memory? Because when I put the @time in benchmark_susc() like this, it does not show any allocations:

function benchmark_susc()
    
    
    # Float 32
    ω,χR,χI,χRC,χIC,γᵢ,a,ωᵣ=provideData(Float32)
    #@time χ=loopLorentz!(χR, χI, χRC, χIC, a, ωᵣ, ω, γᵢ)

    # ComplexF32
    ω,χR,χI,χRC,χIC,γᵢ,a,ωᵣ=provideData(ComplexF32)
    @time χ=loopLorentz!(χR, χI, χRC, χIC, a, ωᵣ, ω, γᵢ)

    nothing
end