Code speed decreases when loop number increases

A basic issue is that you are allocating tons of little matrices over and over, and so at some point it will start spending all of its time doing garbage collection.

The solution is to (a) allocate arrays like pool once and re-use and (b) use StaticArrays for all of your little 2x2 and 4x4 matrices, since those are fixed sizes that you can tell the compiler to exploit.

I translated your code to use these principles, without changing what it does (I think), and also removed some Matlab-isms like allocating 1-column matrices when what you really want is a 1d array. It should be much faster:

using LinearAlgebra
using StatsBase
using Statistics
using Random
using StaticArrays

# adapted from rand(Haar(2)) in RandomMatrices, modified
# to use staticarrays, only for beta=2
function rand_haar2(::Val{n}) where n
    M = @SMatrix randn(ComplexF64, n,n) # = rand(Ginibre(2, n))
    q = qr(M).Q
    L = cispi.(2 .* @SVector(rand(n)))
    return q*diagm(L)
end

function main2(; ps=3, theta = 1.39, Nk = 800, Nr = 500)
    sum_z = zeros(Nk)
    sum_zlog = zeros(Nk)
    sum_s = zeros(Nk)
    sum_slog = zeros(Nk)
    
    Npool = 10^ps
    pool = zeros(Npool)
    pool_sample = zeros(Npool)
    logpool = zeros(Npool)
    z_list = zeros(Nk) 
    s_list = zeros(Nk) 
    z_log_list = zeros(Nk)
    s_log_list = zeros(Nk)

    Kup= @SMatrix[cos(theta) 0; 0 sin(theta)]
    Kdown = @SMatrix[sin(theta) 0; 0 cos(theta)]
    P2up = kron(@SMatrix[1. 0.;0. 1.], @SMatrix[1 0; 0 0])
    P2down = kron(@SMatrix[1 0;0 1], @SMatrix[0 0;0 1])
    
    for i in 1:Nr
        pool .= 0.5
        pool_sample .= 0
        z_list .= 0
        s_list .= 0
        z_log_list .= 0
        z_log_list .= 0

        for k in 1:Nk
            poolcount = 0
            while poolcount < Npool
                z1 = pool[rand(1:Npool)]
                rho1 = diagm(@SVector[z1,1-z1])

                z2 = pool[rand(1:Npool)]
                rho2 = diagm(@SVector[z2,1-z2])

                u1 = rand_haar2(Val{2}())
                u2 = rand_haar2(Val{2}())

                K1up = u1*Kup*u1'
                K1down = u1*Kdown*u1'
                
                K2up = u2*Kup*u2'
                K2down = u2*Kdown*u2'

                rho1p = K1up*rho1*K1up'
                rho2p = K2up*rho2*K2up'

                p1 = real(tr(rho1p+rho1p'))/2
                p2 = real(tr(rho2p+rho2p'))/2
          
                rho1p = (rho1p+rho1p')/(2*p1)
                rho2p = (rho2p+rho2p')/(2*p2)
             
                rho = kron(rho1p,rho2p)

                U = rand_haar2(Val{4}())
                rho_p = P2up*U*rho*U'*P2up'
                p = real(tr(rho_p+rho_p'))/2
             
                temp =(rho_p+rho_p')/2
                #rho_f = @SMatrix[tr(temp[1:2,1:2]) tr(temp[1:2,3:4]); tr(temp[3:4,1:2]) tr(temp[3:4,3:4])]/(p)
                rho_f = @SMatrix[temp[1,1]+temp[2,2] temp[1,3]+temp[2,4]; temp[3,1]+temp[4,2] temp[3,3]+temp[4,4]]/(p)
                
                rho_f = (rho_f+rho_f')/2
                t = abs(tr(rho_f*rho_f))
                z = (1-t)/(1+sqrt(2*t-1))
                if !iszero(z)
                    poolcount = poolcount+1
                    pool_sample[poolcount] = abs(z)
                end
            end

            pool .= pool_sample
            pool_sample .= 0

            logpool .= log.(pool)
            z_log_list[k] = mean(logpool)
            s_log_list[k] = std(logpool)/sqrt(Npool)
            z_list[k] = mean(pool)
            s_list[k] = std(pool)/sqrt(Npool)
        end

        sum_z .+= z_list
        sum_zlog .+= z_log_list
        sum_s .+= s_list.^2
        sum_slog .+= s_log_list.^2
    end
    return sum_z/Nr, sqrt.(sum_s)/Nr, sum_zlog/Nr, sqrt.(sum_slog)/Nr
end
8 Likes