Improving performance on nested for loops (SparseArrays, LibGEOS)

I have two arrays of polygons, one with 9,000,000 polygons and the other with 60,000 polygons. I would like to find the intersecting area of each of these. In the MWE example below where I have reduced the example to 201 polygons x 201 polygons, I can only achieve this in 35.522 ms. I have tried using Threads.@threads in the outer loop of poly_int, but this does not work with either sparse arrays (which I need to form a 9,000,000 by 60,000 array) or LibGEOS. Does anyone have any suggestions for improving the performance (e.g., should I focus on finding a way for Threads to work)?

Note: I deleted a previous post about this, as I believe I have asked the question more directly now.

using LibGEOS, SparseArrays, BenchmarkTools

function poly_int(grid_1,grid_2)
    polygon_intersection = spzeros(length(grid_1["polygon"]),length(grid_2["polygon"]));
    for j = 1:length(grid_2["polygon"]) # accessing in column order
        for i = 1:length(grid_1["polygon"])
            polygon_intersection[i,j] = area(intersection(grid_1["polygon"][i],grid_2["polygon"][j]));
        end
    end
    return polygon_intersection
end

@btime poly_int(grid_1,grid_2);

The full code for the MWE example (that defines grid_1 and grid_2) is here.

using LibGEOS, SparseArrays, BenchmarkTools

grid_1 = Dict();
# c = center, ll = lower left corner, ul = upper left corner
# ur = upper right corner, lr = lower right corner
grid_1["x_c"] = collect(0:0.5:100);
grid_1["y_c"] = collect(0:0.5:100);
grid_1["x_ll"] = grid_1["x_c"] .- (0.5/2);
grid_1["x_ul"] = grid_1["x_c"] .- (0.5/2);
grid_1["x_ur"] = grid_1["x_c"] .+ (0.5/2);
grid_1["x_lr"] = grid_1["x_c"] .+ (0.5/2);
grid_1["y_ll"] = grid_1["y_c"] .- (0.5/2);
grid_1["y_ul"] = grid_1["y_c"] .+ (0.5/2);
grid_1["y_ur"] = grid_1["y_c"] .+ (0.5/2);
grid_1["y_lr"] = grid_1["y_c"] .- (0.5/2);
grid_1["value"] = rand(length(grid_1["x_c"]));
grid_1["uncertainty"] = rand(length(grid_1["x_c"]));

grid_2 = Dict();
grid_2["x_c"] = collect(1:0.5:101);
grid_2["y_c"] = collect(1:0.5:101);
grid_2["x_ll"] = grid_2["x_c"] .- (0.5/2);
grid_2["x_ul"] = grid_2["x_c"] .- (0.5/2);
grid_2["x_ur"] = grid_2["x_c"] .+ (0.5/2);
grid_2["x_lr"] = grid_2["x_c"] .+ (0.5/2);
grid_2["y_ll"] = grid_2["y_c"] .- (0.5/2);
grid_2["y_ul"] = grid_2["y_c"] .+ (0.5/2);
grid_2["y_ur"] = grid_2["y_c"] .+ (0.5/2);
grid_2["y_lr"] = grid_2["y_c"] .- (0.5/2);
grid_2["value"] = rand(length(grid_2["x_c"]));
grid_2["uncertainty"] = rand(length(grid_2["x_c"]));

for grid in (grid_1,grid_2)
    grid["polygon"] = Vector{Polygon}(undef,length(grid["x_c"]));

    # makes polygons from the pixcorners for both grids
    for idx = 1:length(grid["polygon"])
        grid["polygon"][idx] = Polygon([[[grid["x_ll"][idx],grid["y_ll"][idx]],[grid["x_ul"][idx],grid["y_ul"][idx]],[grid["x_ur"][idx],grid["y_ur"][idx]],[grid["x_lr"][idx],grid["y_lr"][idx]],[grid["x_ll"][idx],grid["y_ll"][idx]]]]);
    end

    grid["polygon_area"] = area.(grid["polygon"]);
end

function poly_int(grid_1,grid_2)
    polygon_intersection = spzeros(length(grid_1["polygon"]),length(grid_2["polygon"]));
    for j = 1:length(grid_2["polygon"]) # accessing in column order
        for i = 1:length(grid_1["polygon"])
            polygon_intersection[i,j] = area(intersection(grid_1["polygon"][i],grid_2["polygon"][j]));
        end
    end
    return polygon_intersection
end

@btime poly_int(grid_1,grid_2);

Nice problem! First of all you might be able to improve the serial program like so

function poly_int1(grid_1,grid_2)
    polygon_intersection = spzeros(length(grid_1["polygon"]),length(grid_2["polygon"]));
    for j = 1:length(grid_2["polygon"]) # accessing in column order
        for i = 1:length(grid_1["polygon"])
            polygon_intersection[i,j] = area(intersection(grid_1["polygon"][i],grid_2["polygon"][j]));
        end
    end
    return polygon_intersection
end

function poly_int2(grid_1,grid_2)
    polygon_intersection = spzeros(length(grid_1["polygon"]),length(grid_2["polygon"]));
    for j = 1:length(grid_2["polygon"]) # accessing in column order
        for i = 1:length(grid_1["polygon"])
            if intersects(grid_1["polygon"][i],grid_2["polygon"][j])
                polygon_intersection[i,j] = area(intersection(grid_1["polygon"][i],grid_2["polygon"][j]));
            end
        end
    end
    return polygon_intersection
end

function poly_int3(polygons_1,polygons_2)
    is = Int[]
    js = Int[]
    vs = Float64[]
    for j = 1:length(polygons_2) # accessing in column order
        for i = 1:length(polygons_1)
            if intersects(polygons_1[i],polygons_2[j])
                push!(is,i);
                push!(js,j);
                push!(vs,area(intersection(polygons_1[i],polygons_2[j])));
            end
        end
    end
    return sparse(is,js,vs,length(polygons_1),length(polygons_2));
end

res_1 = poly_int1(grid_1,grid_2);
res_2 = poly_int2(grid_1,grid_2);
@assert res_2 ≈ res_1
res_3 = poly_int3(grid_1["polygon"],grid_2["polygon"]);
@assert res_3 ≈ res_2

@btime poly_int1(grid_1,grid_2);
@btime poly_int2(grid_1,grid_2);
@btime poly_int3(grid_1["polygon"],grid_2["polygon"]);

yielding

  91.991 ms (162019 allocations: 4.34 MiB)
  16.197 ms (42607 allocations: 1.31 MiB)
  9.008 ms (1220 allocations: 65.94 KiB)

And I don’t see a reason why these solutions would not be parallelizable?

2 Likes

Thank you so much, @goerch! This is really helpful. It appears I was running quite a few unnecessary iterations of area(intersection())…

I am surprised that poly_int3 performs so much better given that it doesn’t preallocate memory. Maybe this is because the sparse array is not a lot of memory to preallocate?

On the parallelizable front, it appears the LibGEOS functions are not thread-safe, as the following crashes (which came up here!).

function poly_int3(polygons_1,polygons_2)
    is = Int[]
    js = Int[]
    vs = Float64[]
    Threads.@threads for j = 1:length(polygons_2) # accessing in column order
        for i = 1:length(polygons_1)
            if intersects(polygons_1[i],polygons_2[j])
                push!(is,i);
                push!(js,j);
                push!(vs,area(intersection(polygons_1[i],polygons_2[j])));
            end
        end
    end
    return sparse(is,js,vs,length(polygons_1),length(polygons_2));
end

res_3 = poly_int3(grid_1["polygon"],grid_2["polygon"]);

You most certainly want to lock write accesses to shared data structures. With

function poly_int4(polygons_1,polygons_2)
    is = Int[]
    js = Int[]
    vs = Float64[]
    lk = ReentrantLock()
    Threads.@threads for j = 1:length(polygons_2) # accessing in column order
        for i = 1:length(polygons_1)
            if intersects(polygons_1[i],polygons_2[j])
                _area = area(intersection(polygons_1[i],polygons_2[j]))
                lock(lk) do
                    push!(is,i);
                    push!(js,j);
                    push!(vs,_area);
                end
            end
        end
    end
    return sparse(is,js,vs,length(polygons_1),length(polygons_2));
end

res_1 = poly_int1(grid_1,grid_2);
res_2 = poly_int2(grid_1,grid_2);
@assert res_2 ≈ res_1
res_3 = poly_int3(grid_1["polygon"],grid_2["polygon"]);
@assert res_3 ≈ res_2
res_4 = poly_int4(grid_1["polygon"],grid_2["polygon"]);
@assert res_4 ≈ res_3

@btime poly_int1(grid_1,grid_2);
@btime poly_int2(grid_1,grid_2);
@btime poly_int3(grid_1["polygon"],grid_2["polygon"]);
@btime poly_int4(grid_1["polygon"],grid_2["polygon"]);

I see

  96.046 ms (162019 allocations: 4.34 MiB)
  15.763 ms (42607 allocations: 1.31 MiB)
  8.723 ms (1220 allocations: 65.94 KiB)
  2.245 ms (1852 allocations: 107.09 KiB)

Of course I don’t know if the internal implementation of LibGEOS is thread safe.

1 Like

One more remark: I didn’t check which of the sparse matrix accesses/constructors is more appropriate for your real problem sizes.

poly_int4 unfortunately still crashes Julia for me, but I assume is because of LibGEOS (though I’m not sure why it works for you and not for me!)

I’m with 1.8.0-DEV on Windows. Would you like to show us the beginning of a crash dump?

Sorry - forgot the message for when it crashes. This is running on macOS with 1.7.0

julia(42219,0x16e783000) malloc: *** error for object 0x600000a803c0: pointer being freed was not allocated
julia(42219,0x16e783000) malloc: *** set a breakpoint in malloc_error_break to debug

signal (6): Abort trap: 6
in expression starting at /Users/nicholasbalasus/Julia_Satellite/workspace.jl:67
__pthread_kill at /usr/lib/system/libsystem_kernel.dylib (unknown line)
Allocations: 14480892 (Pool: 14474265; Big: 6627); GC: 17

The internal implementation of LibGEOS.jl functions seems to support optional contexts for reentrancy

function intersects(g1::GEOSGeom, g2::GEOSGeom, context::GEOSContext = _context)
    result = GEOSIntersects_r(context.ptr, g1, g2)
    if result == 0x02
        error("LibGEOS: Error in GEOSIntersects")
    end
    result != 0x00
end

But unfortunately these don’t seem available to users. This might be an issue?

Edit: maybe one could use something like this to generate the necessary methods until the problem is fixed in LibGEOS.

This seems to work for me:

for g1 in (:Point, :MultiPoint, :LineString, :MultiLineString, :LinearRing, :Polygon, :MultiPolygon, :GeometryCollection)
    for g2 in (:Point, :MultiPoint, :LineString, :MultiLineString, :LinearRing, :Polygon, :MultiPolygon, :GeometryCollection)
        @eval intersection_r(obj1::$g1, obj2::$g2, context) = LibGEOS.geomFromGEOS(intersection(obj1.ptr, obj2.ptr, context))
        @eval difference_r(obj1::$g1, obj2::$g2, context) = LibGEOS.geomFromGEOS(difference(obj1.ptr, obj2.ptr, context))
        @eval symmetricDifference_r(obj1::$g1, obj2::$g2, context) = LibGEOS.geomFromGEOS(symmetricDifference(obj1.ptr, obj2.ptr, context))
        @eval union_r(obj1::$g1, obj2::$g2, context) = LibGEOS.geomFromGEOS(union(obj1.ptr, obj2.ptr, context))
    end
end

for g1 in (:Point, :MultiPoint, :LineString, :MultiLineString, :LinearRing, :Polygon, :MultiPolygon, :GeometryCollection)
    for g2 in (:Point, :MultiPoint, :LineString, :MultiLineString, :LinearRing, :Polygon, :MultiPolygon, :GeometryCollection)
        @eval disjoint_r(obj1::$g1, obj2::$g2, context) = disjoint(obj1.ptr, obj2.ptr, context)
        @eval touches_r(obj1::$g1, obj2::$g2, context) = touches(obj1.ptr, obj2.ptr, context)
        @eval intersects_r(obj1::$g1, obj2::$g2, context) = intersects(obj1.ptr, obj2.ptr, context)
        @eval crosses_r(obj1::$g1, obj2::$g2, context) = crosses(obj1.ptr, obj2.ptr, context)
        @eval within_r(obj1::$g1, obj2::$g2, context) = within(obj1.ptr, obj2.ptr, context)
        @eval contains_r(obj1::$g1, obj2::$g2, context) = contains(obj1.ptr, obj2.ptr, context)
        @eval overlaps_r(obj1::$g1, obj2::$g2, context) = overlaps(obj1.ptr, obj2.ptr, context)
        @eval equals_r(obj1::$g1, obj2::$g2, context) = equals(obj1.ptr, obj2.ptr, context)
        @eval equalsexact_r(obj1::$g1, obj2::$g2, tol::Real, context) = equalsexact(obj1.ptr, obj2.ptr, tol, context)
        @eval covers_r(obj1::$g1, obj2::$g2, context) = covers(obj1.ptr, obj2.ptr, context)
        @eval coveredby_r(obj1::$g1, obj2::$g2, context) = coveredby(obj1.ptr, obj2.ptr, context)
    end
end

for geom in (:Point, :MultiPoint, :LineString, :MultiLineString, :LinearRing, :Polygon, :MultiPolygon, :GeometryCollection)
    @eval area_r(obj::$geom, context) = LibGEOS.geomArea(obj.ptr, context)
    @eval geomLength_r(obj::$geom, context) = LibGEOS.geomLength(obj.ptr, context)
end

function poly_int4(polygons_1,polygons_2)
    is = Int[]
    js = Int[]
    vs = Float64[]
    lk = ReentrantLock()
    Threads.@threads for j = 1:length(polygons_2) # accessing in column order
        context = LibGEOS.GEOSContext()
        for i = 1:length(polygons_1)
            if intersects_r(polygons_1[i],polygons_2[j],context)
                _intersection = intersection_r(polygons_1[i],polygons_2[j],context)
                _area = area_r(_intersection,context)
                lock(lk) do
                    push!(is,i);
                    push!(js,j);
                    push!(vs,_area);
                end
            end
        end
    end
    return sparse(is,js,vs,length(polygons_1),length(polygons_2));
end

Feel free to open a PR for making these available to users. I don’t think it was an intentional design for it to remain internal (see e.g. Switch to re-entrant methods by yeesian · Pull Request #46 · JuliaGeo/LibGEOS.jl · GitHub), just that we didn’t implement it all the way through back then.