Sum operations between arrays

I was able to save another 2 ms (5%) on my machine by writing out the loops in order to eliminate the vp_1_3 and vp_2_9 allocations:

function product4(v, vp)
    vp_1_3 = cbrt.(vp)
    vp_2_9 = cbrt.(vp_1_3).^2
    # compute the output type rather than hard-coding Float64, to be more type-generic
    f = Matrix{typeof(zero(eltype(vp_1_3)) + cbrt(zero(eltype(v))))}(undef, length(vp),length(v))
    g = similar(f)
    @inbounds for i in eachindex(v)
        v_1_3 = cbrt(v[i]); v_2_9 = cbrt(v_1_3)^2
        for j in eachindex(vp)
            f[j,i] = (v_1_3 + vp_1_3[j])^2 * sqrt(v_2_9 + vp_2_9[j])
            g[j,i] = exp(-((v_1_3 * vp_1_3[j]) / (v_1_3 + vp_1_3[j]))^4)
        end
    end
    return f, g
end

(and of course you can still save another factor of 2 by specializing for the symmetric case).

7 Likes