View is less performant when taking strides, but more performant when not.
julia> function foo!(A,B,C,dim)
mul!(C[dim,1:10,1:10], A[dim,1:10,1:10], B)
nothing
end
foo! (generic function with 1 method)
julia> function foo2!(A,B,C,dim)
mul!(C[dim,1:10,1:10], @view(A[dim,1:10,1:10]), @view(B[1:10,1:10]))
nothing
end
foo2! (generic function with 1 method)
julia> function foo3!(A,B,C,dim)
mul!(C[:,:,dim], @view(A[:,:,dim]), B)
nothing
end
foo3! (generic function with 1 method)
julia> function foo4!(A,B,C,dim)
mul!(@view(C[:,:,dim]), @view(A[:,:,dim]), B)
nothing
end
foo4! (generic function with 1 method)
julia> function foo5!(A,B,C,dim)
mul!(C[:,:,dim], A[:,:,dim], B)
nothing
end
foo5! (generic function with 1 method)
julia> @btime foo!(A,B,C,1)
1.140 μs (2 allocations: 1.75 KiB)
julia> @btime foo2!(A,B,C,1)
2.022 μs (3 allocations: 21.38 KiB)
julia> @btime foo3!(A,B,C,1)
450.289 ns (1 allocation: 400 bytes)
julia> @btime foo4!(A,B,C,1)
324.885 ns (0 allocations: 0 bytes)
julia> @btime foo5!(A,B,C,1)
526.380 ns (2 allocations: 800 bytes)