# Help with type instabilities

I’m having issues removing type instabilities from the function below, and would appreciate some tips.

``````A = rand(36,36);
out = ; sysdims = [2,9,2];
@code_warntype ptrace2(A,out,sysdims)
``````
``````function ptrace2{T<:Number}(A::Matrix{T},out,sysdims)
nsys = length(sysdims)
keep = setdiff(1:nsys,out)
keepdims = sysdims[keep]
outdims = sysdims[out]::Vector{Int}
N = prod(keepdims)
R = Base.product(range.(1,keepdims)...)
S = Base.product(range.(1,outdims)...)
B = zeros(T,N,N)
inds_ii = Vector{Int}(nsys)
inds_jj = Vector{Int}(nsys)
for r in R, q in R
i, j = tindex(q,keepdims), tindex(r,keepdims)
for l=1:length(keep)
inds_ii[keep[l]] = q[l]
inds_jj[keep[l]] = r[l]
end
for s in S
for l=1:length(out)
inds_ii[out[l]] = s[l]
inds_jj[out[l]] = s[l]
end
ii = tindex(inds_ii,sysdims)
jj = tindex(inds_jj,sysdims)
B[i,j] += A[ii,jj]
end
end
return B
end

function tindex(inds,sysdims)
N = length(inds)
i = inds[N]
d = 1
for n = N-1:-1:1
d *= sysdims[n+1]
i += d * (inds[n]-1)
end
return i
end
``````

In this case a function barrier can help. Here is my attempt (no change of the code except splitting the function into two parts):

``````function ptrace2{T<:Number}(A::Matrix{T},out,sysdims)
nsys = length(sysdims)
keep = setdiff(1:nsys,out)
keepdims = sysdims[keep]
outdims = sysdims[out]::Vector{Int}
N = prod(keepdims)
R = Base.product(range.(1,keepdims)...)
S = Base.product(range.(1,outdims)...)
B = zeros(T,N,N)
_ptrace2!(B,A,R,S,nsys,sysdims,keep,keepdims,out)
end

function _ptrace2!(B,A,R,S,nsys,sysdims,keep,keepdims,out)
inds_ii = Vector{Int}(nsys)
inds_jj = Vector{Int}(nsys)
for r in R, q in R
i, j = tindex(q,keepdims), tindex(r,keepdims)
for l=1:length(keep)
inds_ii[keep[l]] = q[l]
inds_jj[keep[l]] = r[l]
end
for s in S
for l=1:length(out)
inds_ii[out[l]] = s[l]
inds_jj[out[l]] = s[l]
end
ii = tindex(inds_ii,sysdims)
jj = tindex(inds_jj,sysdims)
B[i,j] += A[ii,jj]
end
end
return B
end
``````

Using your example data, I get (Julia 0.6.0-rc2) with the barrier:

julia> @btime ptrace2(\$A, \$out, \$sysdims);
5.268 μs (25 allocations: 2.09 KiB)

while without the barrier (original code):

julia> @btime ptrace2(\$A, \$out, \$sysdims);
57.671 μs (810 allocations: 18.64 KiB)

2 Likes

It seems to me that `R = Base.product(range.(1,keepdims)...)` is not inferred since the type of `Base.product(range.(1,keepdims)...)` depends on the expansion, in your casethe type inferred is `Base.Iterators.Prod2{UnitRange{Int64},UnitRange{Int64}}`.

I think splitting the function is ok so you have a dispatch on the type of R. Maybe someone can give a better solution with a more generic iterator?

Thanks!

Just as an aside, instead of `Base.product(range.(1, keepdims)...)` you might consider using a `CartesianRange(keepdims...)` which is designed for the kind of multidimensional indexing that you’re doing.

Unfortunately, `CartesianRange` also relies on knowing the number of dimensions for type-inference, so it doesn’t solve your immediate issue. But it might at least make your code more understandable and perhaps more performant.

I tried using `CartesianRange`, but it actually made the function slower…

In any case, this is what I have now, after some more clever permutation of indices, if anyone wants to take a stab at making it faster:

``````julia> A = rand(36,36); out = (2,); sysdims = (2,9,2);
julia> @btime ptrace3(\$A,\$out,\$sysdims)
4.221 μs (35 allocations: 2.63 KiB)
``````
``````function ptrace3{T,M,nsub}(A::Matrix{T}, out::NTuple{M,Int}, sysdims::NTuple{nsub,Int})
rkeep = (nsub + 1) .- reverse(setdiff(1:nsub,out))
rout  = (nsub + 1) .- reverse([out...])
rsysdims  = reverse(sysdims)
rkeepdims = rsysdims[rkeep]::NTuple{nsub-M,Int}
routdims  = rsysdims[rout]::NTuple{M,Int}
N = prod(rkeepdims)::Int
R = product(range.(1,rkeepdims)...)
S = product(range.(1,routdims)...)
B = zeros(T,N,N)
_ptrace3!(B,A,R,S,nsub,rsysdims,rkeepdims,rkeep,rout)
end

function _ptrace3!(B,A,R,S,nsub,rsysdims,rkeepdims,rkeep,rout)
K = length(rkeepdims)
O = nsub - K
rinds_ii = Vector{Int}(nsub)
rinds_jj = Vector{Int}(nsub)
for r in R, q in R
i, j = tindex2(q,rkeepdims), tindex2(r,rkeepdims)
#println("i: \$i, j: \$j")
for k=1:K
rinds_ii[rkeep[k]] = q[k]
rinds_jj[rkeep[k]] = r[k]
end
for s in S
for o=1:O
rinds_ii[rout[o]] = s[o]
rinds_jj[rout[o]] = s[o]
end
ii, jj = tindex2(rinds_ii,rsysdims), tindex2(rinds_jj,rsysdims)
#println("ii: \$ii, jj: \$jj")
B[i,j] += A[ii,jj]
end
end
return B
end

function tindex2{N}(rinds,rsysdims::NTuple{N,Int})
i = rinds
d = 1
@inbounds for n = 2:N
d *= rsysdims[n-1]
i += d * (rinds[n]-1)
end
return i
end
``````

There is a package called `TensorOperations` which does this kind of calculation and many more. It is optimized and given the simplicity of code, it would probably save a lot of time down the line and allow someone else to worry about further optimizations.

The code for `ptrace` would look like this:

``````using BenchmarkTools
using TensorOperations

A = rand(2,9,2,2,9,2);

julia> @btime tensortrace(A,[:x1,:t,:z1,:x2,:t,:z2])
5.668 μs (65 allocations: 5.20 KiB)
2×2×2×2 Array{Float64,4}:
[:, :, 1, 1] =
5.1795   5.58526
4.18046  6.27735

[:, :, 2, 1] =
5.70867  5.40797
6.11036  4.07155

[:, :, 1, 2] =
5.26413  5.49429
4.90246  3.99036

[:, :, 2, 2] =
5.26287  3.62941
6.27622  4.9152
``````

If a 2D matrix is really necessary (not really), then a helper functions would do the job:

``````function mat(A::Array)
s,d = size(A),ndims(A)
s[1:d>>>1] != s[(1:d>>>1) + (d>>>1)] && error("tensor size cannot be matrixfied: \$s")
n = prod(s[1:d>>>1])
return reshape(A,n,n)
end

julia> @btime mat(tensortrace(A,[:x1,:t,:z1,:x2,:t,:z2]))
5.924 μs (70 allocations: 5.39 KiB)
4×4 Array{Float64,2}:
5.1795   5.70867  5.26413  5.26287
4.18046  6.11036  4.90246  6.27622
5.58526  5.40797  5.49429  3.62941
6.27735  4.07155  3.99036  4.9152
``````

Note the timings/allocations are very similar.

Hey thanks for the reply. I had indeed looked at TensorOperations, but I do need everything in 2D matrix form. I improved the code further (with some help) and managed to get the partial trace even faster:

``````julia> using Schrodinger, BenchmarkTools
julia> A = rand(36,36); out = (2,); sysdims = (2,9,2);
julia> @benchmark ptrace(\$A,\$out,\$sysdims)
BenchmarkTools.Trial:
memory estimate:  480 bytes
allocs estimate:  5
--------------
minimum time:     1.275 μs (0.00% GC)
median time:      1.431 μs (0.00% GC)
mean time:        1.520 μs (2.17% GC)
maximum time:     181.442 μs (97.48% GC)
--------------
samples:          10000
evals/sample:     10
``````

The code is now available here (the package itself is not fully ready for prime time) for those interested.

Depending on the size of the matrices involved, it might be better to go with TensorOperations. Small tests can obscure the coefficients of the running-time surface. In particular, we have the following for TensorOperations vs. Schrodinger implementation:

A comparable TensorOperations implementation with wrapping functions to make interface like `ptrace`'s:

``````using TensorOperations
using BenchmarkTools

A = rand(4000,4000); out = (2,); sysdims = (10,40,10);

function tens(A::Matrix,sysdims)
size(A)==(prod(sysdims),prod(sysdims)) || error("sysdims product must equal row and col number")
return reshape(A,sysdims...,sysdims...)
end

function mat(A::Array)
s,d = size(A),ndims(A)
s[1:d>>>1] != s[(1:d>>>1) + (d>>>1)] && error("tensor size cannot be matrixfied: \$s")
n = prod(s[1:d>>>1])
return reshape(A,n,n)
end

function traceout(A,out)
inds = collect(1:ndims(A))
inds[out+(ndims(A)>>>1)] = inds[out]
return mat(tensortrace(A,inds))
end

ptrace(A,out,sysdims) = traceout(reshape(A,sysdims...,sysdims...),collect(out))
``````

With benchmark:

``````julia> @benchmark ptrace(\$A,\$out,\$sysdims)
BenchmarkTools.Trial:
memory estimate:  83.69 KiB
allocs estimate:  72
--------------
minimum time:     2.559 ms (0.00% GC)
median time:      2.712 ms (0.00% GC)
mean time:        2.756 ms (0.11% GC)
maximum time:     5.178 ms (17.54% GC)
--------------
samples:          1810
evals/sample:     1
``````

Compared to ptrace from Schrodinger.jl:

``````A = rand(4000,4000); out = (2,); sysdims = (10,40,10);

julia> @benchmark ptrace(A,out,sysdims)
BenchmarkTools.Trial:
memory estimate:  78.47 KiB
allocs estimate:  6
--------------
minimum time:     4.812 ms (0.00% GC)
median time:      5.410 ms (0.00% GC)
mean time:        5.461 ms (0.03% GC)
maximum time:     10.425 ms (0.00% GC)
--------------
samples:          914
evals/sample:     1
``````

If the helper functions are confusing, here is a more compact version (with less parameter checking):

``````using TensorOperations
using BenchmarkTools

A = rand(4000,4000); out = (2,); sysdims = (10,40,10);

mat(A::Array) = ( n = prod(size(A)[1:ndims(A)>>>1]) ; reshape(A,n,n) )

tens(M::Matrix,sysdims) = reshape(M,sysdims...,sysdims...)

traceout(A,out) = ( n = ndims(A)>>>1 ; mat(tensortrace(A,[i-n in out ? i-n : i for i=1:2n])) )

ptrace(M,out,sysdims) = traceout(tens(M,sysdims),out)

@benchmark ptrace(\$A,\$out,\$sysdims)
``````