I’ve been struggling to speed up this problem I’m working with. Apologies about posting this long snippet, I couldn’t think of a more minimal example that has all the features stumping me.

```
using BenchmarkTools
using FastGaussQuadrature
using NLsolve
using ComponentArrays
θ = ComponentArray{Float64}(m=(α=1.2, γ=[10.6, 10.03], θx=[-0.50, -0.005, 1.0]),
f=(α=1.2, γ=[10.6, 10.03], θx=[-0.50, -0.005, 1.0])
)
const ρ = 0.1
const glpoints = gausslaguerre(500)
const jump1 = 62
const jump2 = 65
const ll = 0.001
const ul = 500.0
transfθ(θ) = abs(θ)
function discountedintegral(f, r, T; glpoints=glpoints)
x, w = glpoints
exp(-r*T)/r * sum( w[i]*f(x[i]/r + T) for i in 1:length(x) )
end
function H̃i(i, ti, x; θ=θ, ρ=ρ, glpoints=glpoints)
discountedintegral(s -> Zi(i, s; θ=θ), ρ, ti; glpoints=glpoints) * φi(i, x; θ=θ)
end
function Hi(i, si, xi; θ=θ)
Zi(i, si; θ=θ) * φi(i, xi; θ=θ)
end
function φi(i, xi; θ=θ)
if i == 1
θx = θ.m.θx
elseif i == 2
θx = θ.f.θx
end
res = zero(eltype(θ))
for i in 1:length(xi)
res += θx[i + 1] * xi[i]
end
return exp(res + θx[1])
end
function Zi(i, si; θ=θ, jump1=jump1, jump2=jump2)
if i == 1
α = θ.m.α
γ = θ.m.γ
elseif i == 2
α = θ.f.α
γ = θ.f.γ
end
α = transfθ(α)
γ .= transfθ.(γ)
abs(si)^α + γ[1]*transitionF(si, (jump1 - 45)*4) + γ[2]*transitionF(si, (jump2 - 45)*4)
end
function transitionF(t, t̄)
t < t̄ && return 0
t̄ <= t < t̄ + 0.5 && return 2(t - t̄)^2
t̄ + 0.5 <= t < t̄ + 1 && return 1 - 2(t - 1 - t̄)^2
t̄ + 1 <= t && return 1
end
function U1(t, x1; θ=θ, K=K, ρ=ρ, glpoints=glpoints)
t1 = t[1]
K1 = K[1]
K1*ρ^(-1)*(1 - exp(-ρ*t1)) + H̃i(1, t1, x1; θ=θ, ρ=ρ, glpoints=glpoints)
end
function U2(t, x2; θ=θ, K=K, ρ=ρ, glpoints=glpoints)
t2 = t[2]
K2 = K[2]
K2*ρ^(-1)*(1 - exp(-ρ*t2)) + H̃i(2, t2, x2; θ=θ, ρ=ρ, glpoints=glpoints)
end
function uni_foc(sto, t; θ=θ, x=x, K=K)
sto[1] = Hi(1, t[1], x[1]; θ=θ) - K[1]
sto[2] = Hi(2, t[2], x[2]; θ=θ) - K[2]
end
function Aifun!(ret; x=x, θ=θ, K=K, uni_t0=[10.0, 11.0], ρ=ρ, uni_meth=:trust_region, autodiff=:forward,
xtol=1e-6, ftol=1e-6)
sol = nlsolve((sto, t) -> uni_foc(sto, t; θ=θ, x=x, K=K), uni_t0, method=uni_meth, autodiff=autodiff, xtol=xtol, ftol=ftol)
ret[1] = U1(sol.zero, x[1]; θ=θ, K=K, ρ=ρ, glpoints=glpoints)
ret[2] = U2(sol.zero, x[2]; θ=θ, K=K, ρ=ρ, glpoints=glpoints)
return ret .* 0.6
end
x = [[10.0, 6.0], [6.0, 10.0]]
K = [1.6, 0.6]
ret = zeros(2)
Aifun!(ret)
```

It’s all good, but now suppose I have matrices for `x`

and `K`

. I’m trying to apply function `Aifun!`

to those matrices:

```
matkk = abs.(randn(500, 2))
dataca = ComponentArray{Float64}(m=(x=rand(500, 2),), f=(x=rand(500, 2),))
Amat = similar(matkk)
function Aimat!(Amat, matkk, dataca; θ=θ, uni_t0=[10.0, 11.0], ρ=ρ, uni_meth=:trust_region, autodiff=:forward,
xtol=1e-6, ftol=1e-6)
Ai = similar(matkk, 2)
Ki = similar(matkk, 2)
covs = size(dataca.m.x, 2)
xi = [similar(dataca.m.x, covs), similar(dataca.f.x, covs)]
n = size(matkk, 1)
for i in 1:n
Ai[1] = Amat[i, 1]
Ai[2] = Amat[i, 2]
Ki[1] = matkk[i, 1]
Ki[2] = matkk[i, 2]
xi[1] = dataca.m.x[i, :]
xi[2] = dataca.f.x[i, :]
Aifun!(Ai; x=xi, θ=θ, K=Ki, uni_t0=uni_t0, ρ=ρ, uni_meth=uni_meth, autodiff=autodiff,
xtol=xtol, ftol=ftol)
Amat[i, 1] = Ai[1]
Amat[i, 2] = Ai[2]
end
return Amat
end
```

`Aimat!(Amat, matkk, dataca)`

works, but I’m looking for a more efficient and faster way, if there is any.