Hi Everyone,

I’m working on optimizing two functions, `mplFun`

and `marketEq`

, and I’m looking for advice on how to make them as efficient as possible. Below, I’ve included a simplified version of the code that allows these functions to run, but please note that this is not the original code I’m using—just a minimal example to focus on the optimization of these two functions.

I’ve gone through the performance tips, but I’m wondering if I might be overlooking something or if anyone has suggestions for further improvements.

Currently, when I run the code, I get the following timing:

```
1.897 s (3355 allocations: 82.72 MiB)
```

Since these functions are called thousands of times in the full code, improving their efficiency is critical. Any advice or insights would be greatly appreciated!

Thank you so much in advance for your help!

```
# Define a struct to hold the parameters
mutable struct ModelParameters
γ::Float32
α::Float32
ρ::Float32
ξ::Float32
ω::Float32
η::Float32
θ::Float32
μ_a::Float32
σ_a::Float32
μ_z::Float32
σ_z::Float32
R::Float32
σ::Float32
ϕ::Float32
δ::Float32
S::Int
M::Int
location::Float32
tail::Float32
scale::Float32
p::Float32
cap::Int
end
mutable struct FixedPointSettings
υ::Float32 # Step size for updates (slow adjustment)
iter_S_max::Int # Max iterations for equilibrium
tol_S::Float32 # Tolerance for market equilibrium
tol_S_A::Float32 # Tolerance across markets
tol::Float32 # Tolerance for aggregate labor supply
end
# Initialize model parameters with Float32
modelParams = ModelParameters(
0.7f0,
0.99f0,
0.4f0,
1.0f0,
0.9f0,
10f0,
0.5f0,
0f0,
0.37f0,
0f0,
0.33f0,
0.10f0,
1.0f0,
0.6f0,
0.08f0,
500,
1000,
2f0,
0.71f0,
38.36f0,
0.16f0,
100
)
feSettings = FixedPointSettings(
0.2f0,
10000,
Float32(1e-5),
Float32(1e-3),
Float32(1e-3)
)
function mplFun(
modelParams::ModelParameters,
Φ::Matrix{Float32},
Φ_bar::Vector{Float32},
H::Vector{Float32}
) :: Matrix{Float32}
# Extract necessary parameters from the parameters struct
γ = modelParams.γ
α = modelParams.α
R = modelParams.R
# Precompute constant terms
αγ = α * γ
denom = 1f0 - α * (1f0 - γ)
Z = (1f0 - α * (1f0 - γ)) * (α * (1f0 - γ) / R)^((1f0 - γ) * α / denom)
exponent_1 = 1f0 / denom
exponent_2 = (α - 1f0) / denom
# Use broadcasting to compute MPL, ensuring all operations are done in Float32
MPL = @. Z * (αγ / denom) * Φ_bar' ^ exponent_1 * H' ^ exponent_2 * (1f0 - (1f0 / αγ) * (1f0 - Φ / Φ_bar'))
# Set negative values to 0 in-place
@. MPL = max(MPL, 0f0)
return MPL
end
function marketEq(
modelParams::ModelParameters,
feSettings::FixedPointSettings,
z::Vector{Float32},
Φ::Matrix{Float32},
mass::Vector{Float32},
S_ij_0::Matrix{Float32};
efficient::Bool = false
) :: Tuple{Matrix{Float32}, Bool, Matrix{Float32}, Vector{Float32}, Vector{Float32}, Matrix{Float32}, Matrix{Float32}}
# Extract necessary parameters from the parameters struct
γ = modelParams.γ
α = modelParams.α
η = modelParams.η
θ = modelParams.θ
R = modelParams.R
upsilon = feSettings.υ
iter_S_max = feSettings.iter_S_max
tol_S = feSettings.tol_S
N = length(z)
# Initial Guess
S_ij = copy(S_ij_0)
max_iter_reached = false
# Preallocate arrays as Float32
NS = zeros(Float32, size(S_ij[:, 1:N]))
H = zeros(Float32, N)
Φ_bar = zeros(Float32, N)
MPL = zeros(Float32, size(S_ij[:, 1:N]))
μ = similar(S_ij[:, 1:N], Float32)
S_ij_n = similar(S_ij, Float32)
sum_values = zeros(Float32, size(S_ij, 1))
for i in 1:iter_S_max
# Replace NaNs in S_ij with 0
@views S_ij[isnan.(S_ij)] .= 0
# Compute NS and H
@views NS .= S_ij[:, 1:N].^(η / (1 + η)) .* mass
H .= vec(sum(NS, dims=1)) # Convert sum result to a vector for proper assignment
# Calculate average phi
@views Φ_bar .= vec(sum(NS .* Φ, dims=1)) ./ H
# Calculate mpl
MPL .= mplFun(modelParams, Φ, Φ_bar, H)
# Calculate μ
if efficient
μ .= 1.0f0
else
@views ε = (1f0 / η .+ (1f0 / θ - 1f0 / η) .* S_ij[:, 1:end-1]).^(-1f0)
μ .= ε ./ (1f0 .+ ε)
end
# Calculate the sum of (μ .* MPL)^(1 + η)
@views sum_values .= sum((μ .* MPL).^(1 + η), dims=2)
# Update S_ij_n for all firms
@views S_ij_n[:, 1:end-1] .= (μ .* MPL).^(1 + η) ./ sum_values
@views S_ij_n[isnan.(S_ij_n)] .= 0.0f0
@views S_ij_n[:, end] .= 1f0 .- sum(S_ij_n[:, 1:end-1], dims=2)
@views S_ij_n[S_ij_n .< tol_S] .= 0f0
# Check for convergence
dist_S = maximum(abs.(S_ij_n - S_ij))
if dist_S < tol_S
break # Exit the loop immediately if converged
end
# Update S_ij using upsilon for gradual adjustment
@views S_ij .= upsilon .* S_ij_n .+ (1f0 - upsilon) .* S_ij
if i == iter_S_max
println("WARNING: max iter reached")
max_iter_reached = true
end
end
# Other variables given convergence
W_ij = μ .* MPL
Z = (1f0 - α * (1f0 - γ)) * (α * (1f0 - γ) / R)^((1f0 - γ) * α / (1f0 - α * (1f0 - γ)))
π_ij = Z .* z'.^(1f0 / γ) .* H .* Φ_bar.^(1f0 / γ)
return S_ij, max_iter_reached, NS, H, Φ_bar, W_ij, π_ij
end
using Distributions
# Update N_j in-place
logAbilityDist = LogNormal{Float32}(modelParams.μ_a, modelParams.σ_a)
# Generate percentiles (500 percentiles)
space = LinRange(Float32(0), Float32(1), modelParams.S + 2) # S percentiles + boundaries
# Compute the quantiles for the ability distribution, skipping the 0 and 1 bounds
a = Float32.(quantile(logAbilityDist, space[2:end-1])) # Ensure result is Float32
# Calculate mass (equally weighted), ensuring it's Float32
mass = fill(Float32(1) / Float32(length(a)), length(a)) # Fill array with Float32
z_last = sort(Float32.(rand(LogNormal{Float32}(modelParams.μ_z, modelParams.σ_z), 100)))
function phiFun(z::Vector{Float32}, a::Vector{Float32})::Matrix{Float32}
# Extract necessary parameters from modelParams
ρ = 0.4
ω =0.9
ξ = 1
# Use broadcasting to apply the operation across both z and a
Φ = ((1 - ω) .* (z.^((ρ-1) / ρ))' .+ ω .* (a.^((ρ-1) / ρ))) .^ (ρ / (ρ-1))
# Raise Φ to the power of ξ
Φ = Φ .^ ξ
return Φ
end
Φ = phiFun(Float32.(z_last), Float32.(a))
S_ij_0 = ones(Float32, length(a), 100 + 1) / Float32(100)
S_ij_0[:, 100 + 1] .= 1f0 .- sum(S_ij_0[:, 1:100], dims=2) # Unemployment column
# Call marketEq function for each market
using BenchmarkTools
@btime S_ij, max_iter_reached, _, _, _, W_ij, π_ij = marketEq(modelParams, feSettings, z_last, Φ, mass, S_ij_0)
```