Edit: corrected according to @jling `s comment
Inspired by another thread here is a micro benchmark of Base.kron
for Vector
using LinearAlgebra
using BenchmarkTools
function _kron(A::Matrix, B::Matrix)
C = zeros(size(A,1) * size(B,1), size(A,2) * size(B,2))
for i = 1:size(A, 1)
for j = 1:size(A, 2)
@views C[(i-1)*size(B,1)+1:i*size(B,1),(j-1)*size(B,2)+1:j*size(B,2)] .= A[i,j] * B
end
end
C
end
function _kron!(C::Matrix, A::Matrix, B::Matrix)
for i = 1:size(A, 1)
for j = 1:size(A, 2)
@views mul!(C[(i-1)*size(B,1)+1:i*size(B,1),(j-1)*size(B,2)+1:j*size(B,2)], A[i,j], B)
end
end
C
end
function _kron(A::Vector, B::Vector)
C = zeros(size(A,1) * size(B,1))
for i = 1:size(A, 1)
@views C[(i-1)*size(B,1)+1:i*size(B,1)] .= A[i] * B
end
C
end
function _kron!(C::Vector, A::Vector, B::Vector)
for i = 1:size(A, 1)
@views mul!(C[(i-1)*size(B,1)+1:i*size(B,1)], A[i], B)
end
C
end
A = Matrix([[1.0, 2.0] [3.0, 4.0]])
B = Matrix([[1.0, 2.0] [3.0, 4.0]])
C1 = _kron(A, B)
C2 = kron(A, B)
@assert C1 ≈ C2
C1 = Matrix{Float64}(undef, 4, 4)
_kron!(C1, A, B)
C2 = Matrix{Float64}(undef, 4, 4)
kron!(C2, A, B)
@assert C1 ≈ C2
@btime _kron($A, $B)
@btime kron($A, $B)
C = Matrix{Float64}(undef, 4, 4)
@btime _kron!($C, $A, $B)
@btime kron!($C, $A, $B)
A = Vector([1.0, 2.0])
B = Vector([1.0, 2.0])
C1 = _kron(A, B)
C2 = kron(A, B)
@assert C1 ≈ C2
C1 = Vector{Float64}(undef, 4)
_kron!(C1, A, B)
C2 = Vector{Float64}(undef, 4)
kron!(C2, A, B)
@assert C1 ≈ C2
@btime _kron($A, $B)
@btime kron($A, $B)
C = Vector{Float64}(undef, 4)
@btime _kron!($C, $A, $B)
@btime kron!($C, $A, $B)
with
273.597 ns (5 allocations: 656 bytes)
49.034 ns (1 allocation: 208 bytes)
67.695 ns (0 allocations: 0 bytes)
25.201 ns (0 allocations: 0 bytes)
108.141 ns (3 allocations: 304 bytes)
128.485 ns (7 allocations: 384 bytes)
20.783 ns (0 allocations: 0 bytes)
108.172 ns (6 allocations: 288 bytes)
Is this a defect?