Can you shared your improved code?
I use a different solver. And try Greek letters in code to look like Julia.
using SparseArrays
import LinearAlgebra
import LinearSolve
using Sparspak
using KrylovKit
using BandedMatrices
# using SuiteSparseGraphBLAS
using Interpolations
using Plots
using Random
using Statistics
using Printf
using Profile
using IterativeSolvers
struct RealBusinessCycleModel
α::Float64
γ::Float64
ρ::Float64
δ::Float64
z̄::Float64
θ::Float64
σ::Float64
kbar::Float64
function RealBusinessCycleModel(α::Float64, γ::Float64, ρ::Float64, δ::Float64, z̄::Float64, θ::Float64, σ::Float64)
kbar = ((α * z̄) / (ρ + δ))^(1 / (1 - α))
new(α, γ, ρ, δ, z̄, θ, σ, kbar)
end
end
function get_grid(model::RealBusinessCycleModel, k_min::Float64, k_max::Float64, z_min::Float64, z_max::Float64, I::Int64, J::Int64)::Tuple{Vector{Float64},Vector{Float64},Vector{Float64},Vector{Float64},Float64,Float64}
k_grid::Vector{Float64} = range(k_min, k_max, length=I)
z_grid::Vector{Float64} = range(z_min, z_max, length=J)
# kk::Matrix{Float64} = [k for k in k_grid, z in z_grid]
kk::Matrix{Float64} = reshape(k_grid, (I, 1)) * ones(1, J)
# zz::Matrix{Float64} = [z for k in k_grid, z in z_grid]
zz::Matrix{Float64} = ones(I, 1) * reshape(z_grid, (1, J))
kk_flat::Vector{Float64} = vec(kk)
zz_flat::Vector{Float64} = vec(zz)
dk::Float64 = (k_max - k_min) / (I - 1)
dz::Float64 = (z_max - z_min) / (J - 1)
return (k_grid, z_grid, kk_flat, zz_flat, dk, dz)
end
# Utility function
@inline function u(model::RealBusinessCycleModel, c::Vector{Float64})::Vector{Float64}
if model.γ != 1.0
return (max.(c, 1e-6) .^ (1 .- model.γ) .- 1) ./ (1 .- model.γ)
else
return log.(max.(c, 1e-6))
end
end
# Derivative of the utility function
@inline function u_prime(model::RealBusinessCycleModel, c::Vector{Float64})::Vector{Float64}
return max.(c, 1e-6) .^ (-model.γ)
end
# Inverse of the derivative of the utility function
@inline function u_prime_inv(model::RealBusinessCycleModel, x::Vector{Float64})::Vector{Float64}
return max.(x, 1e-6) .^ (-1 / model.γ)
end
# Production function
@inline function f(model::RealBusinessCycleModel, k::Vector{Float64}, z::Vector{Float64})::Vector{Float64}
return max.(z, 0.0) .* max.(k, 0.0) .^ model.α
end
# Validate transition matrix (each row should sum nearly zero)
function validate_transition_matrix(T::AbstractMatrix{Float64})::Bool
err = maximum(abs.(sum(T, dims=2)))
if err > 1e-10
println("Error: maximum absolute row sum = ", err)
display(LinearAlgebra.Matrix(T))
throw(ArgumentError("T is not a proper transition matrix."))
end
return true
end
function construct_S_Dk(S_B::Vector{Float64}, S_F::Vector{Float64}, dk::Float64, I::Int64, J::Int64)::SparseMatrixCSC{Float64,Int64}
S_B_minus::Vector{Float64} = min.(S_B, 0.0)
S_F_plus::Vector{Float64} = max.(S_F, 0.0)
# println(S_B_minus)
# println(S_F_plus)
lower_diag::Vector{Float64} = -S_B_minus ./ dk
center_diag::Vector{Float64} = (S_B_minus .- S_F_plus) ./ dk
upper_diag::Vector{Float64} = S_F_plus ./ dk
diags = Dict(
-1 => lower_diag[2:end],
0 => center_diag,
1 => upper_diag[1:end-1]
)
return spdiagm(diags...)
end
function solve(model::RealBusinessCycleModel; Delta::Float64=Float64(100.0), tol::Float64=Float64(1e-6), max_iter::Int64=Int64(1000), I::Int64=Int64(20), J::Int64=Int64(20), bounds::Union{Tuple{Float64,Float64,Float64,Float64},Nothing}=nothing, n_sigma::Union{Tuple{Float64,Float64},Float64,Nothing}=nothing)
l_sigma::Float64 = r_sigma::Float64 = 0.0
k_min::Float64, k_max::Float64, z_min::Float64, z_max::Float64 = 0.0, 0.0, 0.0, 0.0
if !isnothing(bounds)
k_min, k_max, z_min, z_max = bounds
elseif !isnothing(n_sigma)
if isa(n_sigma, Tuple) && length(n_sigma) == 2
l_sigma, r_sigma = n_sigma
else
l_sigma = r_sigma = n_sigma
end
z_min = model.z̄ - l_sigma * model.σ
z_max = model.z̄ + r_sigma * model.σ
k_min = (1 - l_sigma * model.σ / model.z̄)^(1 / (1 - model.α)) * model.kbar
k_max = (1 + r_sigma * model.σ / model.z̄)^(1 / (1 - model.α)) * model.kbar
else
error("Must specify bounds or n_sigma")
end
@assert k_min > 0 && z_min > 0 && k_max > k_min && z_max > z_min
k_grid::Vector{Float64}, z_grid::Vector{Float64}, kk::Vector{Float64}, zz::Vector{Float64}, dk::Float64, dz::Float64 = get_grid(model, k_min, k_max, z_min, z_max, I, J)
_Dk_F::SparseMatrixCSC{Float64,Int64} = spdiagm(0 => [-ones(I - 1); 0.0], 1 => ones(I - 1))
Dk_F::SparseMatrixCSC{Float64,Int64} = kron(sparse(LinearAlgebra.I, J, J), (1 / dk) * _Dk_F)
_Dk_B::SparseMatrixCSC{Float64,Int64} = spdiagm(-1 => -ones(I - 1), 0 => [0.0; ones(I - 1)])
Dk_B::SparseMatrixCSC{Float64,Int64} = kron(sparse(LinearAlgebra.I, J, J), (1 / dk) * _Dk_B)
mu::Vector{Float64} = model.θ * (model.z̄ .- zz)
_Dz_F::SparseMatrixCSC{Float64,Int64} = spdiagm(0 => [-ones(J - 1); 0.0], 1 => ones(J - 1))
Dz_F::SparseMatrixCSC{Float64,Int64} = kron((1 / dz) * _Dz_F, sparse(LinearAlgebra.I, I, I))
_Dz_B::SparseMatrixCSC{Float64,Int64} = spdiagm(-1 => -ones(J - 1), 0 => [0.0; ones(J - 1)])
Dz_B::SparseMatrixCSC{Float64,Int64} = kron((1 / dz) * _Dz_B, sparse(LinearAlgebra.I, I, I))
_Dzz::SparseMatrixCSC{Float64,Int64} = spdiagm(-1 => ones(J - 1), 0 => [-1.0; fill(-2.0, J - 2); -1.0], 1 => ones(J - 1))
Dzz::SparseMatrixCSC{Float64,Int64} = kron((1 / dz^2) * _Dzz, sparse(LinearAlgebra.I, I, I))
Dz_U::SparseMatrixCSC{Float64,Int64} = spdiagm(mu .< 0) * Dz_B + spdiagm(mu .>= 0) * Dz_F
mu_Dz::SparseMatrixCSC{Float64,Int64} = spdiagm(mu) * Dz_U
half_sigma2_Dzz::SparseMatrixCSC{Float64,Int64} = 0.5 .* model.σ .^ 2 .* Dzz
v0::Vector{Float64} = u(model, f(model, kk, zz)) / model.ρ
v::Vector{Float64} = copy(v0)
c::Vector{Float64} = zeros(I * J)
status::Int64 = 0
# Allocate memory outside the loop to avoid reallocation and improve performance
vk_F::Vector{Float64} = zeros(I * J)
vk_F_2d::Matrix{Float64} = zeros(I, J)
vk_B::Vector{Float64} = zeros(I * J)
vk_B_2d::Matrix{Float64} = zeros(I, J)
v̄k::Vector{Float64} = zeros(I * J)
indicator_F::Vector{Float64} = zeros(I * J)
indicator_B::Vector{Float64} = zeros(I * J)
indicator_bar::Vector{Float64} = zeros(I * J)
vk_U::Vector{Float64} = zeros(I * J)
S_F::Vector{Float64} = zeros(I * J)
S_B::Vector{Float64} = zeros(I * J)
S_Dk::SparseMatrixCSC{Float64,Int64} = spzeros(I * J, I * J)
A::SparseMatrixCSC{Float64,Int64} = spzeros(I * J, I * J)
coef::SparseMatrixCSC{Float64,Int64} = spzeros(I * J, I * J)
b::Vector{Float64} = zeros(I * J)
v_new::Vector{Float64} = zeros(I * J)
total_time::Float64 = 0.0
for iter::Int64 in 1:max_iter
vk_F .= Dk_F * v
vk_F_2d = reshape(vk_F, (I, J))
vk_F_2d[I, :] = u_prime(model, f(model, k_grid[end] .* ones(Float64, J), z_grid) .- model.δ .* k_grid[end])
vk_F .= vec(vk_F_2d)
vk_B .= Dk_B * v
# LinearAlgebra.mul!(vk_B, Dk_B, v)
vk_B_2d .= reshape(vk_B, (I, J))
vk_B_2d[1, :] .= u_prime(model, f(model, k_grid[1] .* ones(Float64, J), z_grid) .- model.δ .* k_grid[1])
vk_B .= vec(vk_B_2d)
v̄k .= u_prime(model, f(model, kk, zz) .- model.δ .* kk)
S_F .= f(model, kk, zz) .- model.δ .* kk .- u_prime_inv(model, vk_F)
S_B .= f(model, kk, zz) .- model.δ .* kk .- u_prime_inv(model, vk_B)
indicator_F = S_F .> 0
indicator_B = S_B .< 0
indicator_bar = 1.0 .- indicator_F .- indicator_B
vk_U = indicator_F .* vk_F .+ indicator_B .* vk_B .+ indicator_bar .* v̄k
c = u_prime_inv(model, vk_U)
S_Dk = construct_S_Dk(S_B, S_F, dk, I, J)
# validate_transition_matrix(S_Dk) || error("Transition matrix is invalid")
# validate_transition_matrix(mu_Dz) || error("Transition matrix is invalid")
# validate_transition_matrix(half_sigma2_Dzz) || error("Transition matrix is invalid")
# println("Iteration $iter")
# display(Matrix(S_Dk))
# display(Matrix(mu_Dz))
# display(Matrix(half_sigma2_Dzz))
# display(v)
# display(c)
A .= S_Dk .+ mu_Dz .+ half_sigma2_Dzz
coef .= (model.ρ + 1 / Delta) .* sparse(LinearAlgebra.I, I * J, I * J) .- A
b .= u(model, c) .+ v ./ Delta
prob = LinearSolve.LinearProblem(coef, b)
t_start = time()
# @time v_new .= LinearSolve.solve(prob, LinearSolve.KLUFactorization(reuse_symbolic=false)).u
# @time v_new .= LinearSolve.solve(prob, LinearSolve.UMFPACKFactorization(reuse_symbolic=false)).u
@time v_new .= LinearSolve.solve(prob, LinearSolve.SparspakFactorization(reuse_symbolic=false)).u
t_end = time()
total_time += t_end - t_start
# v_new .= coef \ b
err::Float64 = LinearAlgebra.norm(v_new - v)
if err < tol
println("Converged in $iter iterations")
println(@sprintf("Total time taken for solving the linear system: %.6f seconds", total_time))
status = 1
v .= v_new
break
end
v .= v_new
end
if status == 0
println("Failed to converge")
end
return v, c, k_grid, z_grid
end
function main()
α::Float64 = 0.3
γ::Float64 = 2.0
ρ::Float64 = 0.05
δ::Float64 = 0.05
z̄::Float64 = 1.0
θ::Float64 = 0.5
σ::Float64 = 0.025
rbc::RealBusinessCycleModel = RealBusinessCycleModel(α, γ, ρ, δ, z̄, θ, σ)
I::Int64, J::Int64 = 1001, 1001
t_start::Float64 = time()
Profile.init(delay=0.001)
v::Vector{Float64}, c::Vector{Float64}, k::Vector{Float64}, z::Vector{Float64} = solve(rbc; I=I, J=J, n_sigma=(Float64(30.0), Float64(30.0)))
t_end::Float64 = time()
println(@sprintf("Time taken: %.6f seconds", t_end - t_start))
end
main()
Could you please provide the timing you’re getting for your improved code?
Your original version took 133s on my laptop, your new version takes 47s. This is indeed a large improvement, nearly a factor of three.
No, in the normal case it is not help at all. Only if the compiler cannot correctly infer the types can it help. In the case I quoted it absolutely does not help, because it forces an expensive conversion into a less efficient type.
~34s for the new code.
Actually, not. Typing in the function signature only affects dispatch, i.e. if you have an f(x::Int)
and an f(x::Float64)
with different content, the compiler will choose between them based on the type of the actual argument. But in any case the function f
will be compiled for the type of the actual argument. So if the content of f
is the same no matter what x
is, no performance is lost by specifying the function as just f(x)
.
Type assertion on variables like
a::Vector{Float64} = something
has as the only effect that a convert
is inserted like:
a = convert(Vector{Float64}, something)
every time the a
is assigned to.
If something
is a Vector{Float32}
it will be converted to a Vector{Float64}
. If something
is already a Vector{Float64}
the convert
will just return it. It is true that the compiler can use this information, but it will only be helpful if it can’t already infer the type of something
. In that case, this problem should be corrected rather than hidden in this way.
If you for some reason have a computation with uncertain type, you can assert its type with
a = something::Vector{Float64}
This is useful for return from fetch
, since type inference does not work across fetch
.
a = fetch(task)::Vector{Float64}
This will help the compiler, since if the fetch
does not in fact return a Vector{Float64}
an error will be thrown. So the compiler will safely assume that the a
has type Vector{Float64}
and can go about its business. If you don’t assert the type, the compiler does not know the type of a
and will insert code to resolve it, perhaps every time you use a
later.
Edit: There is one more case of type declaration. The fields inside structs. This is very important. Preferably, the types declared here should be concrete. That is, Int
, Float32
, Vector{Int}
and so on, not abstract types like AbstractFloat
, Integer
, Vector
(without a type), AbstractVector{<:Real}
etc.
The reason is that when you access such a struct, the compiler only sees the declared type, not what is actually stored there. If the declared type is concrete, the compiler knows exactly what to do, but if it’s abstract (or absent, which means ::Any
), the compiler must insert code to figure out the type and handle whatever it is. This will slow down things, and can be devastating if it’s in a critical part of the code.
Btw, this is precisely why the above mentioned fetch
(from a Task
) can’t infer the type. A Task
can in principle return anything, and the return value is stored in the field result
in the Task
structure. This field has type Any
to accomodate all sorts of tasks, and this is what fetch
sees. So the compiler does not know what type is returned.