There is actually an undocumented LinearAlgebra.inv!
function that acts in-place on an LU factorization, which can be constructed in-place with lu!
:
inv!(A) = LinearAlgebra.inv!(lu!(A))
However, it still allocates some small 1d arrays internally:
julia> A = rand(1000,1000);
julia> @btime inv($A);
30.062 ms (5 allocations: 8.13 MiB)
julia> @btime inv!($A);
28.390 ms (3 allocations: 508.09 KiB)
To completely eliminate all allocations, you need to pre-allocate the low-level workspace vectors and call lower-level LAPACK functions (based on code for getrf!
and getri!
).
import Base: require_one_based_indexing
import LinearAlgebra: checksquare, chkstride1
import LinearAlgebra.BLAS: @blasfunc, BlasInt
import LinearAlgebra.LAPACK: chkargsok, chklapackerror, liblapack, getrf!, getri!
for (getrf, getri, elty) in ((:dgetrf_,:dgetri_,:Float64), (:sgetrf_,:sgetri_,:Float32), (:zgetrf_,:zgetri_,:ComplexF64), (:cgetrf_,:cgetri_,:ComplexF32))
@eval function getrf!(A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt})
require_one_based_indexing(A)
chkstride1(A)
chkstride1(ipiv)
m, n = size(A)
lda = max(1,stride(A, 2))
length(ipiv) ≥ min(m,n) || throw(DimensionMismatch())
info = Ref{BlasInt}()
ccall((@blasfunc($getrf), liblapack), Cvoid,
(Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
m, n, A, lda, ipiv, info)
chkargsok(info[])
A, ipiv, info[] #Error code is stored in LU factorization type
end
@eval function getri!(A::AbstractMatrix{$elty}, ipiv::AbstractVector{BlasInt}, work::AbstractVector{$elty})
require_one_based_indexing(A, ipiv)
chkstride1(A, ipiv)
n = checksquare(A)
if n != length(ipiv)
throw(DimensionMismatch("ipiv has length $(length(ipiv)), but needs $n"))
end
lda = max(1,stride(A, 2))
lwork = BlasInt(length(work))
lwork ≥ n || throw(DimensionMismatch())
info = Ref{BlasInt}()
ccall((@blasfunc($getri), liblapack), Cvoid,
(Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
n, A, lda, ipiv, work, lwork, info)
chklapackerror(info[])
A
end
end
struct InvWorkspace{T}
ipiv::Vector{BlasInt}
work::Vector{T}
end
function InvWorkspace(A::AbstractMatrix{T}) where {T}
n = checksquare(A)
ipiv = Vector{BlasInt}(undef, n)
work = Vector{T}(undef, 64*n)
return InvWorkspace{T}(ipiv, work)
end
function inv!(A::AbstractMatrix{T}, W::InvWorkspace{T}) where {T}
getrf!(A, W.ipiv)
return getri!(A, W.ipiv, W.work)
end
which gives
julia> W = InvWorkspace(A);
julia> @btime inv!($A, $W);
28.423 ms (0 allocations: 0 bytes)