Speed Up For Loop for Bounds Checking

Hi, I was wondering if I could get some feedback on how I can increase the performance of this section of code, I am relatively new to Julia and I am not to sure if there is a better way to do this:

x = [-88.58, -86.31, -83.89, -83.36]
y = [58.58, 58.49, 58.24, 57.54]
bndxmin = [-95,-95,-95,-90,-90,-90,-85,-85,-85,-80,-80,-95,-95,-90,-85,-80,-85,-90,-95,-95,-95,-90,-85]
bndxmax = [-95,-95,-90,-90,-90,-85,-85,-85,-80,-80,-80,-80,-90,-85,-80,-80,-80,-85,-90,-95,-90,-85,-80]
bndymin = [57.5,52.5,52.5,52.5,57.5,62.5,57.5,52.5,52.5,52.5,57.5,52.5,52.5,52.5,52.5,52.5,57.5,57.5,57.5,57.5,62.5,62.5,62.5]
bndymax = [62.5,57.5,52.5,57.5,62.5,62.5,62.5,57.5,52.5,57.5,62.5,62.5,52.5,52.5,52.5,57.5,57.5,57.5,57.5,62.5,62.5,62.5,62.5]

function getids(x, y, bndxmin, bndxmax, bndymin, bndymax)
    ijc = Vector{Union{Array{Float64,2}, Nothing}}(nothing, length(x)-1)
    logiArr = zeros(length(bndxmin))
    for ii = 1:(length(x)-1)
        for jj = 1:length(bndxmin)
            @inbounds logiArr[jj] = (bndxmin[jj] <= max(x[ii], x[ii+1])) &
                                    (bndxmax[jj] >= min(x[ii], x[ii+1])) &
                                    (bndymin[jj] <= max(y[ii], y[ii+1])) &
                                    (bndymax[jj] >= min(y[ii], y[ii+1]))
        end
        idxs = findall(cl -> (cl == 1), logiArr)
        if !isempty(idxs)
            @inbounds ijc[ii] = [idxs ones(length(idxs))*ii]
        end
    end
    return ijc
end

Profiling the code tells me it is the main body of the For Loop which fills the logiArr and the findall statement that really bogs down the code. Are there any suggestions on how I can make this loop more efficient? Typically x, y, and the bnd variables are on the order 10,000 - 50,000 elements. This section of code is part of larger function that checks where a curve intersects a grid. This particular section of code provides the indices where the x and y cross the bounds of the grid.

Have you read the “Performance tips” section of the docs? Certainly one tip which you should apply to you minimal working example is to move its code into a function. I think you do this in your production code, but still provide a “good” minimal example. Once people have a copy-pasteable code, they may well be more inclined to play around with it.

1 Like

If your code really is this way, and is not inside a function, you probably is taking a performance hit from the fact your variables ijc and logiArr are global.

Everything is in a function in the production code already but I updated the minimal example such that it is also in a function.

1 Like

Sometimes Julia can have trouble seeing that it doesn’t need to load x[ii] fresh inside each jj iteration. It can sometimes be helpful to manually “hoist” those loads (and their computation!) outside of the inner loop:

    @inbounds for ii = 1:(length(x)-1)
        max_x = max(x[ii], x[ii+1])
        # etc
        for jj = 1:length(bndxmin)
            @inbounds logiArr[jj] = (bndxmin[jj] <= max_x) &
                #etc...

Edit: Ah, that doesn’t seem to be a huge thing in this case; it just shaves off ~10%:

REPL session
julia> function getids(x, y, bndxmin, bndxmax, bndymin, bndymax)
           ijc = Vector{Union{Array{Float64,2}, Nothing}}(nothing, length(x)-1)
           logiArr = zeros(length(bndxmin))
           for ii = 1:(length(x)-1)
               for jj = 1:length(bndxmin)
                   @inbounds logiArr[jj] = (bndxmin[jj] <= max(x[ii], x[ii+1])) &
                                           (bndxmax[jj] >= min(x[ii], x[ii+1])) &
                                           (bndymin[jj] <= max(y[ii], y[ii+1])) &
                                           (bndymax[jj] >= min(y[ii], y[ii+1]))
               end
               idxs = findall(cl -> (cl == 1), logiArr)
               if !isempty(idxs)
                   @inbounds ijc[ii] = [idxs ones(length(idxs))*ii]
               end
           end
           return ijc
       end
getids (generic function with 1 method)

julia> @btime getids(x, y, bndxmin, bndxmax, bndymin, bndymax);
  1.004 μs (20 allocations: 1.84 KiB)

julia> function getids2(x, y, bndxmin, bndxmax, bndymin, bndymax)
           ijc = Vector{Union{Array{Float64,2}, Nothing}}(nothing, length(x)-1)
           logiArr = zeros(length(bndxmin))
           for ii = 1:(length(x)-1)
               max_x = max(x[ii], x[ii+1])
               min_x = min(x[ii], x[ii+1])
               max_y = max(y[ii], y[ii+1])
               min_y = min(y[ii], y[ii+1])
               for jj = 1:length(bndxmin)
                   @inbounds logiArr[jj] = (bndxmin[jj] <= max_x) &
                                           (bndxmax[jj] >= min_x) &
                                           (bndymin[jj] <= max_y) &
                                           (bndymax[jj] >= min_y)
               end
               idxs = findall(cl -> (cl == 1), logiArr)
               if !isempty(idxs)
                   @inbounds ijc[ii] = [idxs ones(length(idxs))*ii]
               end
           end
           return ijc
       end
getids2 (generic function with 1 method)

julia> @btime getids2(x, y, bndxmin, bndxmax, bndymin, bndymax);
  911.452 ns (20 allocations: 1.84 KiB)

I also had very small improvements:

x = [-88.58, -86.31, -83.89, -83.36]
y = [58.58, 58.49, 58.24, 57.54]
bndxmin = [-95,-95,-95,-90,-90,-90,-85,-85,-85,-80,-80,-95,-95,-90,-85,-80,-85,-90,-95,-95,-95,-90,-85]
bndxmax = [-95,-95,-90,-90,-90,-85,-85,-85,-80,-80,-80,-80,-90,-85,-80,-80,-80,-85,-90,-95,-90,-85,-80]
bndymin = [57.5,52.5,52.5,52.5,57.5,62.5,57.5,52.5,52.5,52.5,57.5,52.5,52.5,52.5,52.5,52.5,57.5,57.5,57.5,57.5,62.5,62.5,62.5]
bndymax = [62.5,57.5,52.5,57.5,62.5,62.5,62.5,57.5,52.5,57.5,62.5,62.5,52.5,52.5,52.5,57.5,57.5,57.5,57.5,62.5,62.5,62.5,62.5]

function getids(x, y, bndxmin, bndxmax, bndymin, bndymax)
    len_xy_minus_one = length(x) - 1
    ijc = Vector{Array{Float64,2}}(undef, len_xy_minus_one)
    logiArr = falses(length(bndxmin))
    @inbounds for ii = 1:len_xy_minus_one
        iii = ii + 1
        xmax = max(x[ii], x[iii])
        xmin = min(x[ii], x[iii])
        ymax = max(y[ii], y[iii])
        ymin = min(y[ii], y[iii])
        for jj = eachindex(bndxmin)
            logiArr[jj] = (bndxmin[jj] <= xmax) & (bndxmax[jj] >= xmin) &
                          (bndymin[jj] <= ymax) & (bndymax[jj] >= ymin)
        end
        idxs = keys(bndxmin)[logiArr]
        if !isempty(idxs)
            ijc[ii] = [idxs ones(length(idxs))*ii]
        end
    end
    return ijc
end

For me, with BenchmarkTools, the time go from 1.031 μs to 883.188 ns.