Computing a linear combination of all reduced density matrices of a given pure state

Hi once again. Besides the function from QuantumOptics I manage with the help of a coworker to implement the following solution to the above issue. It uses the "parallel bit deposit and extract” instructions, which are part of BMI2. See this. This is the working code: it receives the reduced density matrix (2^nA x 2^nA) as well as another matrix ( 2^N x 2^N ) which is then modified:

function get_mask(rA::Vector{Int}, N::Int)
    mask = UInt64(0)
    for a in rA
        mask = mask | (1 << (N-a))
    end 
    return mask
end

pdep(x::UInt64, y::UInt64) = ccall("llvm.x86.bmi.pdep.64", llvmcall, UInt64, (UInt64, UInt64), x, y)

function embed_rdm(rho:: Matrix{ComplexF64}, N::Int, rA::Vector{Int}, rhoToModify::Matrix{ComplexF64})
    nA = length(rA)
    nB = N - nA;

    maskA = get_mask(rA, N);
    maskB = get_mask(setdiff(collect(1:N), rA), N)

    for iB in 0:(2^nB - 1)
        sB = pdep(UInt64(iB), maskB)
        for iA1 in 0:(2^nA - 1)
            sA1 = pdep(UInt64(iA1), maskA)
            for iA2 in 0:(2^nA - 1)
                sA2 = pdep(UInt64(iA2), maskA)
                rhoToModify[ (sB | sA1) + 1, (sB | sA2) + 1] += rho[iA1 + 1, iA2 + 1]/(2^nB)
            end
        end
    end
end

I hope is useful to someone. Thanks for all the help :slight_smile: