Improving performance on nested for loops (SparseArrays, LibGEOS)

You most certainly want to lock write accesses to shared data structures. With

function poly_int4(polygons_1,polygons_2)
    is = Int[]
    js = Int[]
    vs = Float64[]
    lk = ReentrantLock()
    Threads.@threads for j = 1:length(polygons_2) # accessing in column order
        for i = 1:length(polygons_1)
            if intersects(polygons_1[i],polygons_2[j])
                _area = area(intersection(polygons_1[i],polygons_2[j]))
                lock(lk) do
                    push!(is,i);
                    push!(js,j);
                    push!(vs,_area);
                end
            end
        end
    end
    return sparse(is,js,vs,length(polygons_1),length(polygons_2));
end

res_1 = poly_int1(grid_1,grid_2);
res_2 = poly_int2(grid_1,grid_2);
@assert res_2 ≈ res_1
res_3 = poly_int3(grid_1["polygon"],grid_2["polygon"]);
@assert res_3 ≈ res_2
res_4 = poly_int4(grid_1["polygon"],grid_2["polygon"]);
@assert res_4 ≈ res_3

@btime poly_int1(grid_1,grid_2);
@btime poly_int2(grid_1,grid_2);
@btime poly_int3(grid_1["polygon"],grid_2["polygon"]);
@btime poly_int4(grid_1["polygon"],grid_2["polygon"]);

I see

  96.046 ms (162019 allocations: 4.34 MiB)
  15.763 ms (42607 allocations: 1.31 MiB)
  8.723 ms (1220 allocations: 65.94 KiB)
  2.245 ms (1852 allocations: 107.09 KiB)

Of course I don’t know if the internal implementation of LibGEOS is thread safe.

1 Like