Fixing @threads
with an auxiliary and adding Polyester
to the mix
using LinearAlgebra
using Random
using BenchmarkTools
using Polyester
function distance(x, y, p::Real = 2)
norm((x[1] - y[1], x[2] - y[2]), p)
end
function jfa_serial!(grid, sites, p::Real=2)
for (color, (x, y)) in enumerate(sites)
grid[x, y] = convert(eltype(grid), color)
end
gridp = copy(grid)
k = max(size(grid)...)
while k > 1
k = k ÷ 2 + k % 2
for y in 1:size(grid, 1), x in 1:size(grid, 2)
for j in (-k, 0, k), i in (-k, 0, k)
checkbounds(Bool, grid, x + i, y + j) || continue
colorq = grid[x + i, y + j]
colorq !== 0 || continue
colorp = grid[x, y]
if colorp === 0
gridp[x, y] = colorq
elseif distance(sites[colorp], (x, y), p) > distance(sites[colorq], (x, y), p)
gridp[x, y] = colorq
end
end
end
grid .= gridp
end
return grid
end
function jfa_parallel_aux!(grid, gridp, sites, k, x, y, p)
for j in (-k, 0, k), i in (-k, 0, k)
checkbounds(Bool, grid, x + i, y + j) || continue
colorq = grid[x + i, y + j]
colorq !== 0 || continue
colorp = grid[x, y]
if colorp === 0
gridp[x, y] = colorq
elseif distance(sites[colorp], (x, y), p) > distance(sites[colorq], (x, y), p)
gridp[x, y] = colorq
end
end
end
function jfa_parallel!(grid, sites, p::Real=2)
for (color, (x, y)) in enumerate(sites)
grid[x, y] = convert(eltype(grid), color)
end
gridp = copy(grid)
k = max(size(grid)...)
while k > 1
k = k ÷ 2 + k % 2
Threads.@threads for (x, y) in collect(Iterators.product(1:size(grid, 1), 1:size(grid, 2)))
jfa_parallel_aux!(grid, gridp, sites, k, x, y, p)
end
grid .= gridp
end
return grid
end
function jfa_polyester!(grid, sites, p::Real=2)
for (color, (x, y)) in enumerate(sites)
grid[x, y] = convert(eltype(grid), color)
end
gridp = copy(grid)
k = max(size(grid)...)
while k > 1
k = k ÷ 2 + k % 2
@batch for y in 1:size(grid, 1), x in 1:size(grid, 2)
for j in (-k, 0, k), i in (-k, 0, k)
checkbounds(Bool, grid, x + i, y + j) || continue
colorq = grid[x + i, y + j]
colorq !== 0 || continue
colorp = grid[x, y]
if colorp === 0
gridp[x, y] = colorq
elseif distance(sites[colorp], (x, y), p) > distance(sites[colorq], (x, y), p)
gridp[x, y] = colorq
end
end
end
grid .= gridp
end
return grid
end
function rand_sites(M, N, K)
idx = [(m, n) for m in 1:M, n in 1:N]
shuffle!(idx)
idx[1:K]
end
@btime jfa_serial!(grid, sites) setup = (grid = zeros(Int, 100, 100); sites = collect(rand_sites(size(grid)..., 100)))
@btime jfa_parallel!(grid, sites) setup = (grid = zeros(Int, 100, 100); sites = collect(rand_sites(size(grid)..., 100)))
@btime jfa_polyester!(grid, sites) setup = (grid = zeros(Int, 100, 100); sites = collect(rand_sites(size(grid)..., 100)))
yields
6.647 ms (2 allocations: 78.17 KiB)
2.253 ms (234 allocations: 1.17 MiB)
1.434 ms (9 allocations: 78.83 KiB)
and Polyester
doesn’t even suffer from the (inference?) problems of Threads
. How can that be? What are the tradeoffs between Polyester
and Threads
?