Improving Performance of a Loop

Hi!

I am trying to make the following function faster. Any suggestion would be very much appreciated.

using Random
using LinearAlgebra
using BenchmarkTools


###############################################################
# Loglikelihood
###############################################################

const J = 75  # This is greek letter \Iota
const Ι = 200
const N = 10000
const Mat1 = ones(Int8,Ι,N)
const Mat2 = ones(Int8,J,N)
const Mat3 =  rand(Ι,J,N) .>0.9
NumberParams = Ι 
const τ = rand(Ι,J)

#NumberParams = 2


@views function Log_Likelihood(N,Ι,J,τ,Mat1,Mat2,Mat3,params) 
    llike = 0.0
    θ = params[1]
    γ = params[2:end]
    γ = vcat(γ,1.0)

    #llike_test = zeros(Real,Ι,J,N)
    vals = zeros(Real,size(Mat3)[1:2])
    log_vals = zeros(Real,size(Mat3)[1:2])

    vals .= exp.(-θ.*τ .+ γ)
    log_vals = log.(vals)

    cond= sum(Mat1,dims=1)'


    @fastmath @inbounds  for n = 1 : N   
                if cond[n]>1
                    for j = 1 : J
                        denom_index = 0.0 :: Real
                        if Mat2[j,n] == 1
                                denom_index = Mat1[:,n]'*vals[:,j]
                            for i = 1 : Ι
                                llike += Mat3[i,j,n]*Mat2[j,n]*Mat1[i,n]*(log_vals[i,j]  - log(denom_index))
                                #llike_test[i,j,n] = Mat3[i,j,n]*Mat2[j,n]*Mat1[i,n]*(log_vals[i,j]  - log(denom_index))
                            end    
                        end  
                    end    
                end      
    end    

    llike = -llike

    return  llike

end


XXX = @btime Log_Likelihood(N,Ι,J,τ,Mat1,Mat2,Mat3,vcat(7.0,ones(NumberParams-1)))

 77.749866 seconds (3.60 G allocations: 62.597 GiB, 3.56% gc time)
1.0246133571164802e8

The first thing you must do is to avoid abstract element types in containers. This is an absolute performance killer. Just changing Real to Float64 gives me a 50x speedup.

Abstract types are fine in function signatures, but you must avoid them as container eltypes and field types in structs. They stop the compiler from being able to write optimized code for you actual types.

This, btw, does nothing

Just remove the ::Real.

If performance is important to you (and it should be), then read Performance Tips · The Julia Language before proceeding.

3 Likes

Thanks so much for this. Indeed this is a performance killer. The reason I am adding those Real instead of the Float64 is that I am going to optimize this function computing the gradients with ForwardDiff and I get an error if this type if I do not include the Real when trying to optimize

LoadError: MethodError: no method matching Float64(::ForwardDiff.Dual{ForwardDiff.Tag{var"#7#8", Float64}, Float64, 12})
Closest candidates are:
  (::Type{T})(::Real, ::RoundingMode) where T<:AbstractFloat at rounding.jl:200
  (::Type{T})(::T) where T<:Number at boot.jl:760
  (::Type{T})(::AbstractChar) where T<:Union{AbstractChar, Number} at char.jl:50

However, as you are pointing out, declaring those types is clearly suboptimal from a performance standpoint

The following program

using Random
using LinearAlgebra
using BenchmarkTools

@views function Log_Likelihood(N,Ι,J,τ,Mat1,Mat2,Mat3,params)
    llike = 0.0
    θ = params[1]
    γ = params[2:end]
    γ = vcat(γ,1.0)

    vals = zeros(size(Mat3)[1:2])
    log_vals = zeros(size(Mat3)[1:2])

    vals .= exp.(-θ.*τ .+ γ)
    log_vals = log.(vals)

    number_plants = sum(Mat1,dims=1)'
    for n = 1 : N
        if number_plants[n]>1
            for j = 1 : J
                denom_index = 0.0 :: Real
                if Mat2[j,n] == 1
                    denom_index = Mat1[:,n]'*vals[:,j]
                    for i = 1 : Ι
                        llike += Mat3[i,j,n]*Mat2[j,n]*Mat1[i,n]*(log_vals[i,j] - log(denom_index))
                    end
                end
            end
        end
    end
    return -llike
end


function test()
    J = 75
    Ι = 200
    N = 10000
    Mat1 = ones(Ι,N)
    Mat2 = ones(J,N)
    Mat3 = rand(Ι,J,N) .>0.9
    NumberParams = Ι
    τ = rand(Ι,J)

    XXX = Log_Likelihood(N,Ι,J,τ,Mat1,Mat2,Mat3,vcat(7.0,ones(NumberParams-1)))
end

@btime test()

prints

  2.191 s (23 allocations: 1.16 GiB)

in the console. @DNF’s analysis about the main culprit is correct. BTW: are Mat1 and Mat2 not superfluous?

Do you have a MWE for this problem?

Sure, find it below. It is the same as above, including @DNF suggestion and adding the Optim optimization

using Random, Optim, NLSolversBase
using LinearAlgebra
using BenchmarkTools


###############################################################
# Loglikelihood
###############################################################

const J = 75  # This is greek letter \Iota
const Ι = 200
const N = 10000
const Mat1 = ones(Int8,Ι,N)
const Mat2 = ones(Int8,J,N)
const Mat3 =  rand(Ι,J,N) .>0.9
NumberParams = Ι 
const τ = rand(Ι,J)

#NumberParams = 2


@views function Log_Likelihood(N,Ι,J,τ,Mat1,Mat2,Mat3,params) 
    llike = 0.0
    θ = params[1]
    γ = params[2:end]
    γ = vcat(γ,1.0) 

    #llike_test = zeros(Real,Ι,J,N)
    vals = zeros(Float64,size(Mat3)[1:2])
    log_vals = zeros(Float64,size(Mat3)[1:2])

    vals .= exp.(-θ.*τ .+ γ)
    log_vals .= log.(vals)

    cond = sum(Mat1,dims=1)'


    @fastmath @inbounds  for n = 1 : N   
                if cond[n]>1
                    for j = 1 : J
                        denom_index = 0.0 
                        if Mat2[j,n] == 1
                                denom_index = Mat1[:,n]'*vals[:,j]
                            for i = 1 : Ι
                                llike += Mat3[i,j,n]*Mat2[j,n]*Mat1[i,n]*(log_vals[i,j]  - log(denom_index))
                                #llike_test[i,j,n] = Mat3[i,j,n]*Mat2[j,n]*Mat1[i,n]*(log_vals[i,j]  - log(denom_index))
                            end    
                        end  
                    end    
                end      
    end    

    llike = -llike

    return  llike

end


XXX = @time Log_Likelihood(N,Ι,J,τ,Mat1,Mat2,Mat3,vcat(7.0,ones(NumberParams-1)))

func = TwiceDifferentiable(vars -> Log_Likelihood(N,Ι,J,τ,Mat1,Mat2,Mat3,vars[1:NumberParams]),
                           vcat(7.0,ones(NumberParams-1)); autodiff=:forward);

opt1 = @time Optim.optimize(func, vcat(7.0,ones(NumberParams-1)),show_trace = true)


P.S: You are right that in this MWE Mat1 and Mat2 are irrelevant but in my real code they are not, so I have them as placeholders.

You shouldn’t handle this by forcing your containers to have an abstract type (this should never be necessary), but by allowing your containers to adapt to the input types. For example, you could write

vals = zeros(typeof(θ),size(Mat3)[1:2])

or something like that. This is not the best way, however. Instead, don’t preallocate at all, you can just write

log_vals = -θ.*τ .+ γ
vals = exp.(log_vals)

(no need to first call exp and then log on the exped values.)

4 Likes

BTW, it’s not generally considered good style to provide the sizes of your arrays as input parameters (N,I,J), unless you plan for these parameters to take other values. The sizes are available either by calling size or by using axes, or eachindex when indexing. Relying on sizes as input parameters makes the code less robust and less readable.

5 Likes

Perfect. I’ll change this. Thanks for the advice

This works. Thanks so much!