In particular, a more systematic comparison is to plot the relative error as a function of the condition number.
For example, here is a plot of error (in the least-square solution A \ b
) vs condition number for randomly generated 1000\times 5 matrices A
with condition numbers \kappa ranging from 10 to 10^9 and log-spaced singular values. It clearly shows that both the Cholesky / normal-equations approach and the sweep approach have similar errors, and both scale with \kappa^2, while the QR approach scales with \kappa:
Source code
using LinearAlgebra
# generate a random (Haar-uniform) m×n real orthogonal-column matrix
# using algorithm adapted from https://arxiv.org/abs/math-ph/0609050
function randQ(m::Integer,n::Integer)
QR = qr(randn(m,n))
return QR.Q * Diagonal(sign.(diag(QR.R)))
end
# random m×n matrix with condition number κ and
# logarithmically-spaced singular values.
function randcond(m::Integer,n::Integer,κ::Real)
κ ≥ 1 || throw(ArgumentError("κ=$κ should be ≥ 1"))
r = min(m,n)
σ = exp.(range(0, -log(κ), length=r))
U = randQ(m,r)
V = randQ(n,r)
return U * Diagonal(σ) * V'
end
import LinearRegressionKit
function leastsquare_errors(m, n, κ)
A = randcond(m, n, κ)
b = randn(m)
x_exact = setprecision(4096) do
Float64.(big.(A) \ big.(b))
end
x_qr = A \ b
x_chol = try
cholesky!(Hermitian(A'A)) \ (A'b)
catch
(A'A) \ (A'b) # cholesky can fail if κ is large
end
x_sweep = LinearRegressionKit.sweep_linreg(A, b)
return norm(x_qr - x_exact) / norm(x_exact),
norm(x_chol - x_exact) / norm(x_exact),
norm(x_sweep - x_exact) / norm(x_exact)
end
κ = exp10.(range(1, 9, length=15))
errs = leastsquare_errors.(1000, 5, κ)
using PyPlot
loglog(κ, getindex.(errs, 1), "bo-")
loglog(κ, getindex.(errs, 2), "r*-")
loglog(κ, getindex.(errs, 3), "gs-")
loglog(κ, κ * 1e-16, "k--")
loglog(κ, κ.^2 * 1e-18, "k:")
xlabel(L"condition number $\kappa$")
ylabel("relative error in fit")
legend(["QR", "Cholesky", "Sweep", L"\sim \kappa", L"\sim \kappa^2"], loc="upper left")