I’m writing a function that takes a matrix X
and returns a matrix Y
of the same dimensions such that Y[i, j]
is the sum of the values in the eight cells surrounding X[i, j]
. This function is the “number of neighbors” function from Conway’s Game of Life. I seem to recall that this kind of function is also used in image processing, where they call it a “stencil” function.
For the edge cases, I will use “torus topology,” i.e. you wrap indices around to the opposite end of the grid. For example, the cell immediately “above” X[1, 5]
is X[end, 5]
.
I have written two implementations of this function:

neighborhood_density_loop(X)
loops manually through all the indices in a way that tries to improve memory localization 
neighborhood_density_offset(X)
uses array slicing in a way that tries to lean on Julia’s optimized builtin array summation
In both cases, the code is pretty verbose because you have to manually code the edge and corner cases (see below). I prefer the neighborhood_density_offset
implementation because the code is much more legible and selfexplanatory. However, neighborhood_density_loop
demonstrated much better performance in my benchmark:
VERSION = v"1.8.1"
[ Info: Test OK
neighborhood_density_loop 234.064 ns (1 allocation: 448 bytes)
neighborhood_density_offset 3.876 μs (41 allocations: 8.75 KiB)
Is it possible to improve the code of neighborhood_density_offset
to reduce the number of allocations and achieve similar performance to the for loop? Following a common performance tip, I’ve used @view
on all the righthand array slices, but there is still a lot of allocation going on.
Here is my code:
using BenchmarkTools
"""
Compute the neighborhood density for each cell using a for loop.
"""
function neighborhood_density_loop(X::Matrix{T})::Matrix{Float64} where T<:Number
m, n = size(X)
Y = zeros(Float64, m, n)
for j in 2:n1
# Inner part
for i in 2:m1
Y[i, j] = begin
X[i1, j1] +
X[i , j1] +
X[i+1, j1] +
X[i1, j ] +
X[i+1, j ] +
X[i1, j+1] +
X[i , j+1] +
X[i+1, j+1]
end
end
# Top edge
Y[1, j] = begin
X[m, j1] +
X[1, j1] +
X[2, j1] +
X[m, j ] +
X[2, j ] +
X[m, j+1] +
X[1, j+1] +
X[2, j+1]
end
# Bottom edge
Y[m, j] = begin
X[m1, j1] +
X[m , j1] +
X[1 , j1] +
X[m1, j ] +
X[1 , j ] +
X[m1, j+1] +
X[m , j+1] +
X[1 , j+1]
end
end
for i in 2:m1
# Left edge
Y[i, 1] = begin
X[i1, n] +
X[i , n] +
X[i+1, n] +
X[i1, 1] +
X[i+1, 1] +
X[i1, 2] +
X[i , 2] +
X[i+1, 2]
end
# Right edge
Y[i, n] = begin
X[i1, n1] +
X[i , n1] +
X[i+1, n1] +
X[i1, n ] +
X[i+1, n ] +
X[i1, 1 ] +
X[i , 1 ] +
X[i+1, 1 ]
end
end
# Corners
Y[1, 1] = begin
X[m, n] +
X[1, n] +
X[2, n] +
X[m, 1] +
X[2, 1] +
X[m, 2] +
X[1, 2] +
X[2, 2]
end
Y[1, n] = begin
X[m, n1] +
X[1, n1] +
X[2, n1] +
X[m, n ] +
X[2, n ] +
X[m, 1 ] +
X[1, 1 ] +
X[2, 1 ]
end
Y[m, 1] = begin
X[m1, n] +
X[m , n] +
X[1 , n] +
X[m1, 1] +
X[1 , 1] +
X[m1, 2] +
X[m , 2] +
X[1 , 2]
end
Y[m, n] = begin
X[m1, n1] +
X[m , n1] +
X[1 , n1] +
X[m1, n] +
X[1 , n] +
X[m1, 1] +
X[m , 1] +
X[1 , 1]
end
return Y
end
"""
Compute the neighborhood density for each cell using offset array slices.
"""
function neighborhood_density_offset(X::Matrix{T})::Matrix{Float64} where T<:Number
m, n = size(X)
Y = zeros(Float64, m, n)
# Right neighbors
Y[:, 1:n1] += @view X[:, 2:n]
# Left neighbors
Y[:, 2:n] += @view X[:, 1:n1]
# Lower neighbors
Y[1:m1, :] += @view X[2:m, :]
# Upper neighbors
Y[2:m, :] += @view X[1:m1, :]
# Diagonal neighbors
Y[1:m1, 1:n1] += @view X[2:m, 2:n]
Y[1:m1, 2:n] += @view X[2:m, 1:n1]
Y[2:m, 1:n1] += @view X[1:m1, 2:n]
Y[2:m, 2:n] += @view X[1:m1, 1:n1]
# Right, left neighbors of right and left edges
Y[:, n] += @view X[:, 1]
Y[:, 1] += @view X[:, n]
# Lower, upper neighbors of bottom and top edges
Y[m, :] += @view X[1, :]
Y[1, :] += @view X[m, :]
# Diagonal neighbors along right edge
Y[1:m1, n] += @view X[2:m, 1]
Y[2:m, n] += @view X[1:m1, 1]
# Diagonal neighbors along left edge
Y[1:m1, 1] += @view X[2:m, n]
Y[2:m, 1] += @view X[1:m1, n]
# Diagonal neighbors along bottom edge
Y[m, 1:n1] += @view X[1, 2:n]
Y[m, 2:n] += @view X[1, 1:n1]
# Diagonal neighbors along top edge
Y[1, 1:n1] += @view X[m, 2:n]
Y[1, 2:n] += @view X[m, 1:n1]
# Diagonals pointing outward from each corner
Y[1, 1] += X[m, n]
Y[1, n] += X[m, 1]
Y[m, 1] += X[1, n]
Y[m, n] += X[1, 1]
return Y
end
function test(samples=500, m=6, n=8)
for _ in 1:samples
X = rand(m, n)
@assert all(
neighborhood_density_loop(X) .≈ neighborhood_density_offset(X)
)
end
@info "Test OK"
end
function benchmark(m=6, n=8)
print("neighborhood_density_loop")
@btime neighborhood_density_loop(X) setup=(X=rand($m, $n))
print("neighborhood_density_offset")
@btime neighborhood_density_offset(X) setup=(X=rand($m, $n))
nothing
end
function main()
@show VERSION
test()
benchmark()
end
main()
Nitpicky comments and general review are also welcome.