Speed up multidimensional code without Base.Cartesian

Note that this is a perfect case for using ghost cells. Just make your array Λ bigger by one element in each direction, and copy the periodic boundaries before doing any “stencil” calculations.

Not only is this much more flexible and tends to lead to cleaner code (it separates your boundary conditions from your other looping code), but it is also significantly faster because you eliminate all of the ? conditionals in your inner loops.

In particular, the following code gives the same answer as energy_explicit but uses ghost cells:

Λghost = similar(Λ, size(Λ) .+ 1);
Λghost[CartesianIndices(Λ)] = Λ;
function periodicboundaries!(Λghost::AbstractArray{<:Any,N}) where {N}
    sz = size(Λghost)
    for d = 1:N
        from = CartesianIndices(ntuple(i -> i == d ? (1:1) : (1:sz[i]), N))
        to = CartesianIndices(ntuple(i -> i == d ? (sz[i]:sz[i]) : (1:sz[i]), N))
        for (i,j) in zip(from,to)
            Λghost[j] = Λghost[i]
        end
    end
    return Λghost
end

function energy_arbitrary_faster_ghost(Λ::AbstractArray{T,N}) where {T,N}
	E = 0 + zero(T) # promote to at least Int
	@inbounds @simd for r ∈ CartesianIndices(ntuple(i -> 1:size(Λ,i)-1, N))
		E += Λ[r] * sum(ntuple(d -> @inbounds(Λ[ntuple(i -> i == d ? r[i]+1 : r[i], N)...]), N))
	end
	return -E
end

@btime periodicboundaries!($Λghost)
@btime energy_arbitrary_faster_ghost($Λghost)

giving

  57.329 μs (0 allocations: 0 bytes)
  91.855 μs (0 allocations: 0 bytes)

vs 337µm for energy_explicit on my machine. So, even if you count the cost of copying over the periodic boundaries (which only needs to be done once whenever you update the array) it is more than 2× faster. If you don’t count the time for periodicboundaries!, then it is almost 4× faster.

PS. Note that I eliminated the generator completely, compared to @N5N3’s solution.

5 Likes