I wondered if I could get comments on my code. I’ve reviewed the official performance tips and a few blogs discussing achieving high performance.
Unfortunately, since I have a very scant knowledge of computation and memory allocation, I couldn’t improve the following codes more. I understand that any advice will heavily depend on the specific context and structure, but I was wondering if I could get a brief/broad comment, for instance, on which part I could consider for faster speed.
The garbage collecting time is roughly 20%, and I did try profiling but honestly got lost on how to read the result. Any comments would be more than helpful and appreciated.
function tenure_prob(type::String, x::Real, dist_q::Vector{Float64}, surplus::Array{Float64,5})
@unpack_Parameter param
if type == "a"
μ = μ_a
σ = σ_a
else
μ = μ_b
σ = σ_b
end
# Preallocate distributions
pdf_ϵ = vcat(0.0,diff(cdf(Normal(0,σ_ϵ), grids.ϵ)))
pdf_mq = vcat(0.0,diff(cdf(Normal(μ,σ), grids.mq)))
# tenure 0 matrix
output_0_success = zeros(grid_n, grid_m)
# tenure 1 matrix
output_1_success = zeros(grid_n, grid_m, grid_ϵ)
# tenure 2 matrix
output_2_success = zeros(grid_n, grid_m, grid_ϵ, grid_ϵ)
# tenure 3 matrix
output_3_success = zeros(grid_n, grid_m, grid_ϵ, grid_ϵ, grid_ϵ)
# tenure 4 matrix
output_4_success = zeros(grid_n, grid_m, grid_ϵ, grid_ϵ, grid_ϵ, grid_ϵ)
# tenure 5 matrix
output_5_success = zeros(grid_n, grid_m, grid_ϵ, grid_ϵ, grid_ϵ, grid_ϵ)
@inbounds @threads for (q,mq) in collect(product(1:grid_n,1:grid_m))
ind_q = findfirst(surplus[:,1,x,1,mq] .> 0.0) # shouldn't matter y
ind_q = isnothing(findfirst(surplus[:,1,x,1,mq] .> 0.0)) ? grid_n+1 : ind_q
prob_0_success = (q >= ind_q) ? dist_q[q] * pdf_mq[mq] : 0
output_0_success[q,mq] = prob_0_success
ind_five_mq = findfirst(surplus[q,1,min(x+5,max_x+1),6,:] .> 0.0)
ind_five_mq = isnothing(ind_five_mq) ? grid_m+1 : ind_five_mq
prob_5_success = mq >= ind_five_mq
val_mq = grids.mq[mq]
ind_one_y = findfirst(surplus[q,:,min(x+1,max_x+1),2,mq] .> 0.0)
ind_two_y = findfirst(surplus[q,:,min(x+2,max_x+1),3,mq] .> 0.0)
ind_three_y = findfirst(surplus[q,:,min(x+3,max_x+1),4,mq] .> 0.0)
ind_four_y = findfirst(surplus[q,:,min(x+4,max_x+1),5,mq] .> 0.0)
thr_one_y = isnothing(ind_one_y) ? 1e+8 : grids.y[ind_one_y] - val_mq
for ϵ_1 in 1:grid_ϵ
prob_1_success = grids.ϵ[ϵ_1] >= thr_one_y ? pdf_ϵ[ϵ_1] : 0
output_1_success[q,mq,ϵ_1] = prob_0_success * prob_1_success
thr_two_y = isnothing(ind_two_y) ? 1e+8 : 2 * (grids.y[ind_two_y] - val_mq) - grids.ϵ[ϵ_1]
for ϵ_2 in 1:grid_ϵ
prob_2_success = grids.ϵ[ϵ_2] >= thr_two_y ? pdf_ϵ[ϵ_2] : 0
output_2_success[q,mq,ϵ_1,ϵ_2] = output_1_success[q,mq,ϵ_1] * prob_2_success
thr_three_y = isnothing(ind_three_y) ? 1e+8 : 3 * (grids.y[ind_three_y] - val_mq) - grids.ϵ[ϵ_1] - grids.ϵ[ϵ_2]
prob_3_success = ((grids.ϵ .>= thr_three_y) .* pdf_ϵ)
@views output_3_success[q,mq,ϵ_1,ϵ_2,:] = output_2_success[q,mq,ϵ_1,ϵ_2] * prob_3_success
thr_four_y = [isnothing(ind_four_y) ? 1e+8 : 4 * (grids.y[ind_four_y] - val_mq) - grids.ϵ[ϵ_1] - grids.ϵ[ϵ_2] - val_ϵ for val_ϵ in grids.ϵ]
@views output_4_success[q,mq,ϵ_1,ϵ_2,:,:] = output_2_success[q,mq,ϵ_1,ϵ_2] * ((grids.ϵ .>= thr_three_y) .* pdf_ϵ) .* ((grids.ϵ' .>= thr_four_y) .* pdf_ϵ')
@views output_5_success[q,mq,ϵ_1,ϵ_2,:,:] = output_4_success[q,mq,ϵ_1,ϵ_2,:,:] * prob_5_success
end
end
end
return (prob_0_success = sum(output_0_success),
prob_1_success = sum(output_1_success), prob_1_fail = sum(output_0_success) - sum(output_1_success),
prob_2_success = sum(output_2_success), prob_2_fail = sum(output_1_success) - sum(output_2_success),
prob_3_success = sum(output_3_success), prob_3_fail = sum(output_2_success) - sum(output_3_success),
prob_4_success = sum(output_4_success), prob_4_fail = sum(output_3_success) - sum(output_4_success),
prob_5_success = sum(output_5_success), prob_5_fail = sum(output_4_success) - sum(output_5_success))
end