function instruct2!(state, U, loc)
a, c, b, d = U
step = 1 << (loc - 1)
step_2 = 1 << loc
for j in 0:step_2:size(state, 1)-step
for i in j+1:j+step
u1rows!(state, i, i+step, a, b, c, d)
end
end
return state
end
@inline @inbounds function u1rows!(state::AbstractVector, i::Int, j::Int, a, b, c, d)
w = state[i]
v = state[j]
state[i] = a*w+b*v
state[j] = c*w+d*v
state
end
I’m using SMatrix instead of Matrix for a small matrix (2x2), the only related operations are iterate_index, which looks like a, b, c, d = U (U is the matrix), the rest of the code is only related to a, b, c, d, but the performance seems not to, the difference between SMatrix and Matrix increases along with the size of state
I tested this on Julia
Julia Version 1.1.0
Commit 80516ca202 (2019-01-21 21:24 UTC)
Platform Info:
OS: macOS (x86_64-apple-darwin18.2.0)
CPU: Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz
WORD_SIZE: 64
LIBM: libimf
LLVM: libLLVM-6.0.1 (ORCJIT, skylake)
Environment:
JULIA_EDITOR = code
julia> @benchmark foreach(k->instruct2!($st, $U, 1), 1:100)
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 183.067 ms (0.00% GC)
median time: 191.323 ms (0.00% GC)
mean time: 192.796 ms (0.00% GC)
maximum time: 209.240 ms (0.00% GC)
--------------
samples: 26
evals/sample: 1
julia> @benchmark foreach(k->instruct2!($st, $(Matrix(U)), 1), 1:100)
BenchmarkTools.Trial:
memory estimate: 0 bytes
allocs estimate: 0
--------------
minimum time: 178.031 ms (0.00% GC)
median time: 181.558 ms (0.00% GC)
mean time: 184.131 ms (0.00% GC)
maximum time: 219.924 ms (0.00% GC)
--------------
samples: 28
evals/sample: 1
But this looks unexpected since the main cost has nothing to do with which kind of matrix type to use…