The following function runs 1.5x slower in Julia latest 1.0-DEV, compared to Julia 0.6. The memory allocation is the same but @code_llvm shows very mysterious code in 1.0-DEV. Any idea what is going on?
using BenchmarkTools
function gram_schmidt1!(U::Matrix{T}) where T <: Real
m = size(U, 2)
for k = 1:m
uk = view(U,:,k)
for j = 1:k-1
@views uk .-= (U[:,j] ⋅ uk) .* U[:,j]
end
uk ./= norm(uk)
end
end
function ttt()
N = 100
A = randn(N, N)
gram_schmidt1!(A)
s = sum(A)
end
I’m not sure what the problem with views are, but as suggested by kristoffer.carlsson, consider using uviews:
julia> using LinearAlgebra #modify gram_schmidt1 to accept AbstractMatrices
julia> @benchmark ttt()
BenchmarkTools.Trial:
memory estimate: 546.95 KiB
allocs estimate: 10002
--------------
minimum time: 1.447 ms (0.00% GC)
median time: 1.503 ms (0.00% GC)
mean time: 1.709 ms (4.81% GC)
maximum time: 82.099 ms (97.87% GC)
--------------
samples: 2910
evals/sample: 1
julia> using UnsafeArrays
julia> Base.mightalias(::UnsafeArray, ::UnsafeArray) = false
julia> function tttu()
N = 100
A = randn(N, N)
@uviews A begin
gram_schmidt1!(A)
end
s = sum(A)
end
julia> @benchmark tttu()
BenchmarkTools.Trial:
memory estimate: 78.20 KiB
allocs estimate: 2
--------------
minimum time: 623.537 μs (0.00% GC)
median time: 680.178 μs (0.00% GC)
mean time: 873.065 μs (3.19% GC)
maximum time: 123.324 ms (99.07% GC)
--------------
samples: 5659
evals/sample: 1
If you replace the dot and norm functions with ones written in Julia:
@inline function jdot(x,y)
@boundscheck length(x) == length(y) || throw(BoundsError())
out = zero(promote_type(eltype(x),eltype(y)))
@inbounds @simd for i = eachindex(x)
out += x[i] * y[i]
end
out
end
@inline function jnorm(x)
out = zero(eltype(x))
@inbounds @simd for i = eachindex(x)
out += x[i] * x[i]
end
sqrt(out)
end
function gram_schmidt2!(U::AbstractMatrix{T}) where T <: Real
m = size(U, 2)
@inbounds for k = 1:m
uk = view(U,:,k)
for j = 1:k-1
@views uk .-= jdot(U[:,j], uk) .* U[:,j]
end
uk ./= jnorm(uk)
end
end
function tttu2()
N = 100
A = randn(N, N)
@uviews A begin
gram_schmidt2!(A)
end
s = sum(A)
end