Hello,
I’m trying to differentiate a left matrix division on CuArrays. I actually have the L parts l1,l2
of lu-decompositions of two matrices M1, M2
respectively. So m = l1*l1'
I made two functions, one that reconstructs the matrices and one that does forward-backward substitution solves.
MWE :
using CUDA, LinearAlgebra, Flux
l1 = tril(rand(5,5)) |> cu
l2 = tril(rand(5,5)) |> cu
function f(L1,L2)
tr((L2*L2')\(L1*L1'))
end
g = gradient(Flux.params(l1)) do
f(l1,l2)
end
throws
ERROR: ArgumentError: cannot take the CPU address of a CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}
Stacktrace:
[1] unsafe_convert(#unused#::Type{Ptr{Float32}}, x::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
@ CUDA C:\Users\dehaybe\.julia\packages\CUDA\Axzxe\src\array.jl:315
[2] getrf!(A::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
@ LinearAlgebra.LAPACK C:\Users\dehaybe\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\lapack.jl:575
[3] lu!(A::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::RowMaximum; check::Bool)
@ LinearAlgebra C:\Users\dehaybe\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\lu.jl:81
[4] lu(A::Adjoint{Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, pivot::RowMaximum; check::Bool)
@ LinearAlgebra C:\Users\dehaybe\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\lu.jl:279
[5] lu(A::Adjoint{Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, pivot::RowMaximum) (repeats 2 times)
@ LinearAlgebra C:\Users\dehaybe\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\lu.jl:278
[6] \(A::Adjoint{Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, B::Diagonal{Float32, FillArrays.Fill{Float32, 1, Tuple{Base.OneTo{Int64}}}})
@ LinearAlgebra C:\Users\dehaybe\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\generic.jl:1142
[7] (::Zygote.var"#766#767"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}})(Z̄::Diagonal{Float32, FillArrays.Fill{Float32, 1,
Tuple{Base.OneTo{Int64}}}})
@ Zygote C:\Users\dehaybe\.julia\packages\Zygote\cCyLF\src\lib\array.jl:507
[8] #3077#back
@ C:\Users\dehaybe\.julia\packages\ZygoteRules\AIbCs\src\adjoint.jl:67 [inlined]
[9] Pullback
@ c:\Users\dehaybe\OneDrive - UCL\Doctorat\julia\ReinforcementLearning\src\ReinforcementLearningZoo\src\algorithms\policy_gradient\mpo.jl:300 [inlined]
[10] (::typeof(∂(f)))(Δ::Float32)
@ Zygote C:\Users\dehaybe\.julia\packages\Zygote\cCyLF\src\compiler\interface2.jl:0
[11] Pullback
@ c:\Users\dehaybe\OneDrive - UCL\Doctorat\julia\ReinforcementLearning\src\ReinforcementLearningZoo\src\algorithms\policy_gradient\mpo.jl:296 [inlined]
[12] (::Zygote.var"#94#95"{Zygote.Params, typeof(∂(#13)), Zygote.Context})(Δ::Float32)
@ Zygote C:\Users\dehaybe\.julia\packages\Zygote\cCyLF\src\compiler\interface.jl:357
[13] gradient(f::Function, args::Zygote.Params)
@ Zygote C:\Users\dehaybe\.julia\packages\Zygote\cCyLF\src\compiler\interface.jl:76
[14] top-level scope
@ c:\Users\dehaybe\OneDrive - UCL\Doctorat\julia\ReinforcementLearning\src\ReinforcementLearningZoo\src\algorithms\policy_gradient\mpo.jl:295
function f2(L1,L2)
L1tril = LowerTriangular(L1)
L2tril = LowerTriangular(L2)
Y = L2tril\(L1tril*L1tril')
X = L2tril'\Y
tr(X)
end
g = gradient(Flux.params(l1)) do
f2(l1,l2)
end
throws
ERROR: ArgumentError: cannot take the CPU address of a CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}
Stacktrace:
[1] unsafe_convert(#unused#::Type{Ptr{Float32}}, x::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
@ CUDA C:\Users\dehaybe\.julia\packages\CUDA\Axzxe\src\array.jl:315
[2] unsafe_convert(#unused#::Type{Ptr{Float32}}, V::SubArray{Float32, 2, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, UnitRange{Int64}}, false})
@ Base .\subarray.jl:427
[3] trtrs!(uplo::Char, trans::Char, diag::Char, A::SubArray{Float32, 2, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, UnitRange{Int64}}, false}, B::SubArray{Float32, 1, Vector{Float32}, Tuple{UnitRange{Int64}}, true})
@ LinearAlgebra.LAPACK C:\Users\dehaybe\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\lapack.jl:3424
[4] ldiv!(A::LowerTriangular{Float32, SubArray{Float32, 2, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, UnitRange{Int64}}, false}}, B::SubArray{Float32, 1, Vector{Float32}, Tuple{UnitRange{Int64}}, true})
@ LinearAlgebra C:\Users\dehaybe\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\triangular.jl:727
[5] ldiv!(xA::LowerTriangular{Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, b::SparseArrays.SparseVector{Float32, Int64})
@ SparseArrays C:\Users\dehaybe\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\SparseArrays\src\sparsevector.jl:1912
[6] ldiv!(A::LowerTriangular{Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, B::SparseArrays.SparseMatrixCSC{Float32, Int64})
@ LinearAlgebra C:\Users\dehaybe\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\bidiag.jl:744
[7] \(A::LowerTriangular{Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, B::Diagonal{Float32, FillArrays.Fill{Float32, 1, Tuple{Base.OneTo{Int64}}}})
@ LinearAlgebra C:\Users\dehaybe\AppData\Local\Programs\Julia-1.7.2\share\julia\stdlib\v1.7\LinearAlgebra\src\triangular.jl:1661
[8] (::Zygote.var"#758#759"{UpperTriangular{Float32, Adjoint{Float32, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}})(Ȳ::Diagonal{Float32, FillArrays.Fill{Float32, 1,
Tuple{Base.OneTo{Int64}}}})
@ Zygote C:\Users\dehaybe\.julia\packages\Zygote\cCyLF\src\lib\array.jl:491
[9] #3049#back
@ C:\Users\dehaybe\.julia\packages\ZygoteRules\AIbCs\src\adjoint.jl:67 [inlined]
[10] Pullback
@ c:\Users\dehaybe\OneDrive - UCL\Doctorat\julia\ReinforcementLearning\src\ReinforcementLearningZoo\src\algorithms\policy_gradient\mpo.jl:310 [inlined]
[11] (::typeof(∂(f2)))(Δ::Float32)
@ Zygote C:\Users\dehaybe\.julia\packages\Zygote\cCyLF\src\compiler\interface2.jl:0
[12] Pullback
@ c:\Users\dehaybe\OneDrive - UCL\Doctorat\julia\ReinforcementLearning\src\ReinforcementLearningZoo\src\algorithms\policy_gradient\mpo.jl:314 [inlined]
[13] (::Zygote.var"#94#95"{Zygote.Params, typeof(∂(#15)), Zygote.Context})(Δ::Float32)
@ Zygote C:\Users\dehaybe\.julia\packages\Zygote\cCyLF\src\compiler\interface.jl:357
[14] gradient(f::Function, args::Zygote.Params)
@ Zygote C:\Users\dehaybe\.julia\packages\Zygote\cCyLF\src\compiler\interface.jl:76
[15] top-level scope
@ c:\Users\dehaybe\OneDrive - UCL\Doctorat\julia\ReinforcementLearning\src\ReinforcementLearningZoo\src\algorithms\policy_gradient\mpo.jl:313
What I actually need is the gradient of the trace of the result of inv(M2)*M1
.
Unfortunately, this seems to be not differentiable. I’ve been looking for a few days but I’m stuck. It’s quite hard to integrate AD and CUDA. Is there a package that I may not know of that could help ? Thanks !!