OLS with collinearity and weights using GLM.jl

I am having a problem fitting an OLS model with weights and collinearity using GLM.jl.
Here is a simple example

y = rand(10)
x = rand(10, 2)
x = hcat(x,x)
lm(x,y) # correctly removes last two columns
lm(x, y, wts = ones(10)) # fails at cholesky! in GLM.delbeta!
lm(x, y, wts = ones(10), dropcollinear = true) # # fails at cholesky! in GLM.delbeta!

I further narrowed this down to the following function call:

linmod = LinearModel(GLM.LmResp(y,ones(10)), GLM.cholpred(x, true))
GLM.delbeta!(linmod.pp, linmod.rr.y, linmod.rr.wts) # fails
GLM.delbeta!(linmod.pp, linmod.rr.y) # works

using

@lessGLM.delbeta!(linmod.pp, linmod.rr.y, linmod.rr.wts)

I get

function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
    cf = cholfactors(p.chol)
    piv = p.chol.p
    cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv]
    cholesky!(Hermitian(cf, Symbol(p.chol.uplo)))
    ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r))
    p
end

and

@less GLM.delbeta!(linmod.pp, linmod.rr.y)
function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal
    ch = p.chol
    delbeta = mul!(p.delbeta, adjoint(p.X), r)
    rnk = rank(ch)
    if rnk == length(delbeta)
        ldiv!(ch, delbeta)
    else
        permute!(delbeta, ch.p)
        for k=(rnk+1):length(delbeta)
            delbeta[k] = -zero(T)
        end
        LAPACK.potrs!(ch.uplo, view(ch.factors, 1:rnk, 1:rnk), view(delbeta, 1:rnk))
        invpermute!(delbeta, ch.p)
    end
    p
end

Am I doing something wrong here? I am not so familiar with weighted regression but would it be possible to apply a similar function to regression with weights?

Here is a naive implementation that seems to work:

y = rand(10)
x = rand(10, 2)
xc = hcat(x,x)
wts = [ones(5)..., 0.5 .* (ones(5))...];
julia> lm(x, y, wts = wts)
LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}:

Coefficients:
──────────────────────────────────────────────────────────────
       Coef.  Std. Error     t  Pr(>|t|)  Lower 95%  Upper 95%
──────────────────────────────────────────────────────────────
x1  0.294281    0.394139  0.75    0.4860  -0.6918      1.28036
x2  0.410263    0.390553  1.05    0.3375  -0.566846    1.38737
──────────────────────────────────────────────────────────────
T = eltype(xc)
ch = cholesky!((xc'diagm(wts)xc), Val(true), tol = -one(T), check = false) 
delbeta = zeros(T, size(xc,2))
delbeta = mul!(delbeta, adjoint(xc), diagm(wts)*y)
rnk = rank(ch)
permute!(delbeta, ch.p)
for k=(rnk+1):length(delbeta)
    delbeta[k] = -zero(Float64)
end
LAPACK.potrs!(ch.uplo, view(ch.factors, 1:rnk, 1:rnk), view(delbeta, 1:rnk))
invpermute!(delbeta, ch.p)
julia> delbeta[1:rnk] ≈ coef(lm(x, y, wts = wts))
true

File an issue. This probably shouldn’t happen.

1 Like

That would have been an awesome issue number 2 days ago. Anyways, thank you @pdeffebach and for anyone finding this follow here:

https://github.com/JuliaStats/GLM.jl/issues/420