Depending on the frequency of non-positive definite matrices and their size, it may pay off to use an if-else instead of a try catch:
julia> function safe_chol!(U::AbstractArray{<:Real,2}, Σ::AbstractArray{<:Real,2})
p = size(Σ,1)
@assert p == size(Σ,2) == size(U,1) == size(U,2)
@inbounds for i ∈ 1:p
Uᵢᵢ = Σ[i,i]
for j ∈ 1:i-1
Uⱼᵢ = Σ[j,i]
for k ∈ 1:j-1
Uⱼᵢ -= U[k,i] * U[k,j]
end
Uⱼᵢ /= U[j,j]
U[j,i] = Uⱼᵢ
Uᵢᵢ -= abs2(Uⱼᵢ)
end
Uᵢᵢ > 0 ? U[i,i] = √Uᵢᵢ : return false
end
true
end
safe_chol! (generic function with 1 method)
julia> function logdettri(tri::AbstractMatrix{T}) where T
p = size(tri,1)
@assert p == size(tri,2)
out = zero(T) #if you trust in numerical stability, you could swap comments
# out = one(tri)
@inbounds for i ∈ 1:p
out += log(tri[i,i])
# out *= tri[i,i]
end
out
# log(out)
end
logdettri (generic function with 1 method)
julia> function logpdf_normal_try(X::AbstractMatrix{TX},
Σ::AbstractMatrix{TΣ}) where {TX, TΣ}
n, m = size(X)
try
U = chol(Σ)
A = X / U
-0.5 * (n*(m*log(2*π) + 2*logdet(U)) + sum(abs2, A))
catch
convert(promote_type(TX, TΣ), -Inf)
end
end
logpdf_normal_try (generic function with 1 method)
julia> function logpdf_normal_if(X::AbstractMatrix{TX},
Σ::AbstractMatrix{TΣ}) where {TX, TΣ}
n, m = size(X)
U = Matrix{promote_type(TX, TΣ)}(m,m)
density = if safe_chol!(U, Σ)
A = X / UpperTriangular(U)
-0.5 * (n*(m*log(2*π) + 2*logdettri(U)) + sum(abs2, A))
else
convert(promote_type(TX, TΣ), -Inf)
end
density
end
logpdf_normal_if (generic function with 1 method)
julia> PD = randn(50,40) |> x -> x' * x ;
julia> PSD = randn(35,40) |> x -> x' * x ;
julia> X = randn(100,40) * chol(PD) ;
julia> using BenchmarkTools
julia> @btime logpdf_normal_if($X, $PD)
32.942 μs (3 allocations: 43.95 KiB)
-12346.835627794037
julia> @btime logpdf_normal_try($X, $PD)
33.493 μs (8 allocations: 44.08 KiB)
-12346.835627794037
julia> @btime logpdf_normal_if($X, $PSD)
6.518 μs (1 allocation: 12.63 KiB)
-Inf
julia> @btime logpdf_normal_try($X, $PSD)
28.794 μs (6 allocations: 12.75 KiB)
-Inf
julia> PD = randn(15,10) |> x -> x' * x;
julia> PSD = randn(8,10) |> x -> x' * x;
julia> X = randn(100,10) * chol(PD);
julia> @btime logpdf_normal_if($X, $PD)
11.622 μs (2 allocations: 8.81 KiB)
-2441.958129016739
julia> @btime logpdf_normal_try($X, $PD)
12.524 μs (7 allocations: 8.94 KiB)
-2441.958129016739
julia> @btime logpdf_normal_if($X, $PSD)
215.772 ns (1 allocation: 896 bytes)
-Inf
julia> @btime logpdf_normal_try($X, $PSD)
22.612 μs (6 allocations: 1.00 KiB)
-Inf
julia> PD = randn(150,100) |> x -> x' * x;
julia> PSD = randn(80,100) |> x -> x' * x;
julia> X = randn(500,100) * chol(PD);
julia> @btime logpdf_normal_if($X, $PD)
305.014 μs (4 allocations: 468.91 KiB)
-184577.00794031422
julia> @btime logpdf_normal_try($X, $PD)
266.962 μs (9 allocations: 469.03 KiB)
-184577.00794031422
julia> @btime logpdf_normal_if($X, $PSD)
67.166 μs (2 allocations: 78.20 KiB)
-Inf
julia> @btime logpdf_normal_try($X, $PSD)
92.714 μs (7 allocations: 78.33 KiB)
-Inf
Of course, if you invest more time you’re likely to find a better solution.
Alternatively, a better Cholesky decomposition algorithm probably wont fall behind LAPACK so quickly (assuming your covariance matrix is free of dual numbers).