Cannot ForwardDiff through linear least squares via QR

Hey,

I’d like to get derivatives for a linear least squares problem where the parameter appears in the right hand side:

using LinearAlgebra
using ForwardDiff

A = rand(10, 3)
b = rand(10)

qr_fact = qr(A)

function f(p)
    res = b * p
    v = qr_fact \ res
    return v
end

g = ForwardDiff.derivative(f, 0.1)

However, this produces

ERROR: MethodError: no method matching ldiv!(::LinearAlgebra.QRCompactWY{ForwardDiff.Dual{ForwardDiff.Tag{typeof(f), Float64}, Float64, 1}, Matrix{ForwardDiff.Dual{ForwardDiff.Tag{typeof(f), Float64}, Float64, 1}}, Matrix{ForwardDiff.Dual{ForwardDiff.Tag{typeof(f), Float64}, Float64, 1}}}, ::Vector{ForwardDiff.Dual{ForwardDiff.Tag{typeof(f), Float64}, Float64, 1}})

Closest candidates are:
  ldiv!(::Bidiagonal, ::AbstractVecOrMat)
   @ LinearAlgebra C:\Users\SomeUser\AppData\Local\Programs\Julia-1.9.2\share\julia\stdlib\v1.9\LinearAlgebra\src\bidiag.jl:729
  ldiv!(::Cholesky, ::AbstractVecOrMat)
   @ LinearAlgebra C:\Users\SomeUser\AppData\Local\Programs\Julia-1.9.2\share\julia\stdlib\v1.9\LinearAlgebra\src\cholesky.jl:579
  ldiv!(::Diagonal, ::AbstractVecOrMat)
   @ LinearAlgebra C:\Users\SomeUser\AppData\Local\Programs\Julia-1.9.2\share\julia\stdlib\v1.9\LinearAlgebra\src\diagonal.jl:415
  ...

Stacktrace:
 [1] \(F::LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}}, B::Vector{ForwardDiff.Dual{ForwardDiff.Tag{typeof(f), Float64}, Float64, 1}})
   @ LinearAlgebra C:\Users\SomeUser\AppData\Local\Programs\Julia-1.9.2\share\julia\stdlib\v1.9\LinearAlgebra\src\LinearAlgebra.jl:536
 [2] f(p::ForwardDiff.Dual{ForwardDiff.Tag{typeof(f), Float64}, Float64, 1})
   @ Main .\REPL[6]:3
 [3] derivative(f::typeof(f), x::Float64)
   @ ForwardDiff C:\Users\SomeUser\.julia\packages\ForwardDiff\PcZ48\src\derivative.jl:14
 [4] top-level scope
   @ REPL[8]:1

Applying the operations individually

function f2(p)
    res = b * p
    v = qr_fact.R \ (qr_fact.Q' * res)[1:3]
    return v
end

g = ForwardDiff.derivative(f2, 0.1)

results in

ERROR: MethodError: no method matching lmul!(::Adjoint{ForwardDiff.Dual{ForwardDiff.Tag{typeof(f2), Float64}, Float64, 1}, LinearAlgebra.QRCompactWYQ{ForwardDiff.Dual{ForwardDiff.Tag{typeof(f2), Float64}, Float64, 1}, Matrix{ForwardDiff.Dual{ForwardDiff.Tag{typeof(f2), Float64}, Float64, 1}}, Matrix{ForwardDiff.Dual{ForwardDiff.Tag{typeof(f2), Float64}, Float64, 1}}}}, ::Vector{ForwardDiff.Dual{ForwardDiff.Tag{typeof(f2), Float64}, Float64, 1}})

Closest candidates are:
  lmul!(::SparseArrays.SPQR.QRSparseQ, ::StridedVecOrMat)
   @ SparseArrays C:\Users\SomeUser\AppData\Local\Programs\Julia-1.9.2\share\julia\stdlib\v1.9\SparseArrays\src\solvers\spqr.jl:227
  lmul!(::Diagonal, ::AbstractVecOrMat)
   @ LinearAlgebra C:\Users\SomeUser\AppData\Local\Programs\Julia-1.9.2\share\julia\stdlib\v1.9\LinearAlgebra\src\diagonal.jl:253
  lmul!(::UniformScaling, ::AbstractVecOrMat)
   @ LinearAlgebra C:\Users\SomeUser\AppData\Local\Programs\Julia-1.9.2\share\julia\stdlib\v1.9\LinearAlgebra\src\uniformscaling.jl:305
  ...

Stacktrace:
 [1] *(adjQ::Adjoint{Float64, LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}}, B::Vector{ForwardDiff.Dual{ForwardDiff.Tag{typeof(f2), Float64}, Float64, 1}})
   @ LinearAlgebra C:\Users\SomeUser\AppData\Local\Programs\Julia-1.9.2\share\julia\stdlib\v1.9\LinearAlgebra\src\qr.jl:727
 [2] f2(p::ForwardDiff.Dual{ForwardDiff.Tag{typeof(f2), Float64}, Float64, 1})
   @ Main .\REPL[10]:3
 [3] derivative(f::typeof(f2), x::Float64)
   @ ForwardDiff C:\Users\SomeUser\.julia\packages\ForwardDiff\PcZ48\src\derivative.jl:14
 [4] top-level scope
   @ REPL[11]:1

Since the derivatives are easy, it’s probably a problem with overloads / type system.
How can this be solved?

I’m using ForwardDiff v0.10.36 and Julia 1.9.2.

There is an older but similar question here that might give some pointers: Using ForwardDiff on qr factorization

Tanks for the pointer. I’ve looked at this question, but I don’t need to differentiate qr() itself.
I want to get derivatives for the least-squares solution

x^*(p) = \operatorname{arg\,min}_x \lVert A x - b(p) \rVert_2,

which I compute via QR decomposition as A is a constant matrix. That is, let A = QR, then we have

x^*(p) = R^{-1} Q^T b(p),

where R^{-1} Q^T is some constant matrix.

@mohamed82008 now that ImplicitDifferentiation.jl supports ForwardDiff.jl, is this something that could be solved with DifferentiableFactorizations.jl?

This works:

using LinearAlgebra
using ForwardDiff

A = rand(10, 3)
b = rand(10)

struct QRMatrices
    Qt::Matrix{Float64}
    R::Matrix{Float64}

    function QRMatrices(A)
        qr_fact = qr(A)
        dim, n = size(A)

        qt = Matrix{Float64}(undef, n, dim)
        for j = 1:n
            qt[j, :] = qr_fact.Q * LinearAlgebra.I[1:dim, j]
        end
        r = Matrix(qr_fact.R)
        new(qt, r)
    end
end

function Base.:\(lhs::QRMatrices, b)
    return lhs.R \ (lhs.Qt * b)
end

my_qr = QRMatrices(A)
function f4(p)
    res = b * p
    v = my_qr \ res
    return v
end

g = ForwardDiff.derivative(f4, 0.1)

However, I would prefer not to copy the QR matrices.

DF.jl is for differentiating the output of a factorization wrt to its input. It seems the factorization in the OP is a constant rather than a function of the input so you shouldn’t need DF.jl.

This is slightly simpler and marginally faster in my benchmarks:

A1 = rand(10, 3)
b1 = rand(10)

function F(A, b, x)
    qr_fact = qr(A)
    m, n = size(A)
    res = b * x
    v = qr_fact.R \ (qr_fact.Q[:, 1:n]' * res)
    return v
end

g = ForwardDiff.derivative(x -> F(A1, b1, x), 0.1)

benchmarking with, for example,

using BenchmarkTools
@benchmark ForwardDiff.derivative(x -> F( $rand(10, 3), $rand(10), x), 0.1)