function instruct2!(state, U, loc)
a, c, b, d = U
step = 1 << (loc - 1)
step_2 = 1 << loc
for j in 0:step_2:size(state, 1)-step
for i in j+1:j+step
u1rows!(state, i, i+step, a, b, c, d)
end
end
return state
end
@inline @inbounds function u1rows!(state::AbstractVector, i::Int, j::Int, a, b, c, d)
w = state[i]
v = state[j]
state[i] = a*w+b*v
state[j] = c*w+d*v
state
end
I’m using SMatrix
instead of Matrix
for a small matrix (2x2), the only related operations are iterate_index
, which looks like a, b, c, d = U
(U is the matrix), the rest of the code is only related to a, b, c, d
, but the performance seems not to, the difference between SMatrix
and Matrix
increases along with the size of state
I tested this on Julia
Julia Version 1.1.0
Commit 80516ca202 (2019-01-21 21:24 UTC)
Platform Info:
OS: macOS (x86_64-apple-darwin18.2.0)
CPU: Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz
WORD_SIZE: 64
LIBM: libimf
LLVM: libLLVM-6.0.1 (ORCJIT, skylake)
Environment:
JULIA_EDITOR = code
julia> @benchmark foreach(k->instruct2!($st, $U, 1), 1:100)
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 183.067 ms (0.00% GC)
median time: 191.323 ms (0.00% GC)
mean time: 192.796 ms (0.00% GC)
maximum time: 209.240 ms (0.00% GC)
--------------
samples: 26
evals/sample: 1
julia> @benchmark foreach(k->instruct2!($st, $(Matrix(U)), 1), 1:100)
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 178.031 ms (0.00% GC)
median time: 181.558 ms (0.00% GC)
mean time: 184.131 ms (0.00% GC)
maximum time: 219.924 ms (0.00% GC)
--------------
samples: 28
evals/sample: 1
But this looks unexpected since the main cost has nothing to do with which kind of matrix type to use…