How to speed up these functions?

amrods · August 4, 2021, 12:00pm

This is the current status:

n = 1000
t0 = rand(1:200, n)
t1 = rand(1:200, n)
t = vcat(t0, t1)

c = similar(t)
for i in 1:2n
    rand() < 0.5 ? c[i] = t[i] : c[i] = rand(1:t[i])
end

xx0 = 100 .* [rand(n).*rand([-1, 1], n) rand(n).*rand([-1, 1], n)]
xx1 = 100 .* [rand(n).*rand([-1, 1], n) rand(n).*rand([-1, 1], n)]
xx = vcat(xx0, xx1)

α = 1.2
β = [-1.33, -0.006, 0.12]
γ = [0.07, 0.03, 0.5]
ρ = 11.3
ψ0 = 0.046
ψ1 = 0.040

function logℒ_fast(α, β, t, c, x)
    eα = abs(α)
    n, k = size(x)

    (n == length(t) == length(c) && length(β) == k + 1) || throw(DimensionMismatch())
    s = zero(typeof(α))
    for i in 1:n
        xb = sum(x[i, j] * β[j+1] for j in 1:k) + β[1]
        ti = t[i]
        s += (1 - (c[i] == ti)) * (log(eα) + (eα - 1) * log(ti) + xb) - ti^eα * exp(xb)
    end
    return s
end

# compute log(1 + exp(x)) without spurious overflow
softplus(x) = log1p(exp(-abs(x))) + max(x, 0)

function 𝒬_fast(γ, t, x)
    s = log(1 + zero(eltype(x)))
    n, k = size(x)
    (n == length(t) && length(γ) == k + 1) || throw(DimensionMismatch())
    for i = 1:n
        xb0 = sum(x[i, j] * γ[j] for j in 1:k)
        xb1 = xb0 + γ[end]
        s += (t[i] > 0) * xb0 + (t[i] > 1) * xb1 - softplus(xb0) - softplus(xb1)
    end
    return s
end

## removes collinear columns
function removeredundant(x)
    return hcat(unique(eachcol(x))...)
end

function 𝒞_fast(ρ, t0, t1, xx0, xx1)
    xx = removeredundant([xx0 xx1])

    y1 = log.(t1)
    y0 = log.(t0)
    b1 = xx \ y1
    b0 = xx \ y0

    s = zero(eltype(ρ))
    n, k = size(xx)
    (n == length(t0) == length(t1)) || throw(DimensionMismatch())
    for i = 1:n
        e1 = y1[i] - sum(xx[i, j] * b1[j] for j in 1:k)
        e0 = y0[i] - sum(xx[i, j] * b0[j] for j in 1:k)
        s -= (e1 * e0 - ρ)^2
    end
    return s
end

function boole_i(t, age; ll_ret=1, ul_ret=2)
    age - ll_ret <= t <= age + ul_ret
end

function 𝒮_fast(ψ0, ψ1, t0, t1; age=4*(65-45), ll_ret=1, ul_ret=2)
    n = length(t0)
    (n == length(t1)) || throw(DimensionMismatch())
    s = zero(typeof(ψ0))

    for i in 1:n
        s -= (boole_i(t0[i], age; ll_ret=ll_ret, ul_ret=ul_ret) - ψ0)^2 +
            (boole_i(t1[i], age; ll_ret=ll_ret, ul_ret=ul_ret) - ψ1)^2
    end
    return s
end

logℒ_fast(α, β, t, c, xx) +
𝒬_fast(γ, t, xx) +
𝒞_fast(ρ, t0, t1, xx0, xx1) +
𝒮_fast(ψ0, ψ1, t0, t1)

There are some allocations I didn’t know how to avoid in 𝒞_fast:

I need to remove collinear columns in the matrix [xx0 xx1].
I need to compute the OLS coefficients of regressing log(t1) on xx, and log(t0) on xx.

Does anyone have any suggestions on avoiding those allocations?
Does anyone have any suggestions on further optimizations? (I know I can transpose the matrices to exploit the column-major ordering of Julia).

Also, as I mentioned above, I get an error when trying to use LoopVectorization in the loops. I think that happens because @turbo does not support more than one index and I want to generalize the program to allow for a larger xx matrix. How can I avoid that error?

Topic		Replies	Views
Help with parallelizing indirect inference estimation Performance parallel	17	861	August 2, 2021
Improving Performance of a Loop Performance	9	415	November 3, 2021
Improving Speed for a Novice Performance	7	2189	May 14, 2018
Any suggestion for speeding up my function? Performance question	11	426	November 17, 2022
Very slow loop Performance loops	33	2260	January 24, 2021

How to speed up these functions?

Related topics