Cell list algorithm is slower than double for loop

I want to solve the fixed radius near neighbor problem, i.e., for a given point set in 2D, find all the point pairs whose length is less than a given radius r.
I use the cell list algorithm, see Cell List algorithm and Cell list algorithm for a reference.
However, my program runs much slower than a double for loop.
I wanna know why and how to modify my program to run faster. Following are my codes.

# fied-radius nearest neighbor
#cell list algorithm
function FRNNMain(NumNod::Int64,Dim::Int64,A::Array{Float64,2},s::Float64)
    PoiPair = Array{Tuple{Int64,Int64},1}(undef,1)
    popfirst!(PoiPair)
    r = s/sqrt(Dim)
    Cell = PoiCell(A,NumNod,r)
    for key in keys(Cell)
        vaule = Cell[key]
        InnerCellPair!(vaule,PoiPair)
        LoopNeighbor!(key,Cell,PoiPair,A,s)
    end
    return PoiPair
end
function PoiCell(A::Matrix{Float64},NumNod::Int64,r::Float64)
    Cell = Dict{Tuple{Int64,Int64},Array{Int64,1}}()
    for i = 1:1:NumNod
        x = fld(A[1,i],r)
        y = fld(A[2,i],r)
        z = (x,y)
        if z ∉ keys(Cell)
            Cell[z] = [i]
        else
            push!(Cell[z],i)
        end
    end
    return Cell
end
function GetNeighborCell(key::Tuple{Int64,Int64})
    NeiCell = Array{Tuple{Int64,Int64}}(undef,20,1)
    NeiCell[1] = (key[1]+1,key[2]+1)
    NeiCell[2] = (key[1]+1,key[2])
    NeiCell[3] = (key[1]+1,key[2]-1)
    NeiCell[4] = (key[1],key[2]+1)
    NeiCell[5] = (key[1],key[2]-1)
    NeiCell[6] = (key[1]-1,key[2])
    NeiCell[7] = (key[1]-1,key[2]+1)
    NeiCell[8] = (key[1]-1,key[2]-1)
    NeiCell[9] = (key[1]-1,key[2]+2)
    NeiCell[10] = (key[1],key[2]+2)
    NeiCell[11] = (key[1]+1,key[2]+2)
    NeiCell[12] = (key[1]+2,key[2]-1)
    NeiCell[13] = (key[1]+2,key[2])
    NeiCell[14] = (key[1]+2,key[2]+1)
    NeiCell[15] = (key[1]-1,key[2]-2)
    NeiCell[16] = (key[1],key[2]-2)
    NeiCell[17] = (key[1]+1,key[2]-2)
    NeiCell[18] = (key[1]-2,key[2]-1)
    NeiCell[19] = (key[1]-2,key[2])
    NeiCell[20] = (key[1]-2,key[2]+1)
    return NeiCell
end
function InnerCellPair!(value::Array{Int64},pairs::Array{Tuple{Int64,Int64},1})
    Len = length(value)
    if Len >= 2
        for i = 1:Len-1
            for j = (i+1):Len
                z = (value[i],value[j])
                push!(pairs,z)
            end
        end
    end
    return pairs
end
function LoopNeighbor!(key::Tuple{Int64,Int64},Cell::Dict{Tuple{Int64,Int64},Array{Int64,1}},PoiPair::Array{Tuple{Int64,Int64},1},A::Array{Float64,2},s::Float64)
    NeiCell = Array{Tuple{Int64,Int64}}(undef,20,1)
    NeiCell = GetNeighborCell(key)
    for i = 1:8
        if NeiCell[i] ∈ keys(Cell)
            FindPair!(Cell[key],Cell[NeiCell[i]],PoiPair,A,s)
        end
    end
    return PoiPair
end
function FindPair!(value::Array{Int64},value1::Array{Int64},PoiPair::Array{Tuple{Int64,Int64},1},A::Array{Float64,2},s::Float64)
    for P1 in value
        for P2 in value1
            #Dist = Distance(A[P1,:],A[P2,:])
            Dist::Float64 = (A[1,P1]-A[1,P2])^2+(A[2,P1]-A[2,P2])^2
            if Dist <= s^2
                AddPair!(P1,P2,PoiPair)
            end
        end
    end
    return PoiPair
end
function Distance(Point1::Array{Float64,1},Point2::Array{Float64,1})
    Dist = (Point1[1]-Point2[1])^2+(Point1[2]-Point2[2])^2::Float64
    return Dist
end
function AddPair!(P1::Int64,P2::Int64,PoiPair::Array{Tuple{Int64,Int64},1})
    pair1 = (P1,P2)
    pair2 = (P2,P1)
    if pair1 ∉ PoiPair
        if pair2 ∉ PoiPair
            push!(PoiPair,pair1)
        end
    end
    return PoiPair
end
NumNod = 1000
Dim = 2
A = rand(Dim,NumNod)
s = 0.1;
a = FRNNMain(NumNod,Dim,A,s)

and the running time

 @btime FRNNMain(NumNod,Dim,A,s)
  182.920 ms (971 allocations: 749.27 KiB)

The doule for loop code

NumNod = 1000
Dim = 2
A = rand(Dim,NumNod)
s = 0.1;
function forloop(A::Matrix{Float64},NumNod::Int64,s::Float64)
    PoiPair = Array{Tuple{Int64,Int64},1}(undef,1)
    popfirst!(PoiPair)
    for i = collect(1:NumNod-1)
        for j = collect(i+1:NumNod)
            Dist = (A[1,i]-A[1,j])^2+(A[2,i]-A[2,j])^2
            if Dist <= s^2
                push!(PoiPair,(i,j))
            end
        end
    end
    return PoiPair
end
b = forloop(A,NumNod,s)

and the running time

 @btime forloop(A,NumNod,s)
  1.623 ms (1014 allocations: 4.46 MiB)

The code warntype result is also provided

 @code_warntype FRNNMain(NumNod,Dim,A,s)
Variables
  #self#::Core.Compiler.Const(FRNNMain, false)
  NumNod::Int64
  Dim::Int64
  A::Array{Float64,2}
  s::Float64
  PoiPair::Array{Tuple{Int64,Int64},1}
  r::Float64
  Cell::Dict{Tuple{Int64,Int64},Array{Int64,1}}
  @_9::Union{Nothing, Tuple{Tuple{Int64,Int64},Int64}}
  key::Tuple{Int64,Int64}
  vaule::Array{Int64,1}

Body::Array{Tuple{Int64,Int64},1}
1 ─ %1  = Core.apply_type(Main.Tuple, Main.Int64, Main.Int64)::Core.Compiler.Const(Tuple{Int64,Int64}, false)
│   %2  = Core.apply_type(Main.Array, %1, 1)::Core.Compiler.Const(Array{Tuple{Int64,Int64},1}, false)
│         (PoiPair = (%2)(Main.undef, 1))
│         Main.popfirst!(PoiPair)
│   %5  = Main.sqrt(Dim)::Float64
│         (r = s / %5)
│         (Cell = Main.PoiCell(A, NumNod, r))
│   %8  = Main.keys(Cell)::Base.KeySet{Tuple{Int64,Int64},Dict{Tuple{Int64,Int64},Array{Int64,1}}}
│         (@_9 = Base.iterate(%8))
│   %10 = (@_9 === nothing)::Bool
│   %11 = Base.not_int(%10)::Bool
└──       goto #4 if not %11
2 ┄ %13 = @_9::Tuple{Tuple{Int64,Int64},Int64}::Tuple{Tuple{Int64,Int64},Int64}
│         (key = Core.getfield(%13, 1))
│   %15 = Core.getfield(%13, 2)::Int64
│         (vaule = Base.getindex(Cell, key))
│         Main.InnerCellPair!(vaule, PoiPair)
│         Main.LoopNeighbor!(key, Cell, PoiPair, A, s)
│         (@_9 = Base.iterate(%8, %15))
│   %20 = (@_9 === nothing)::Bool
│   %21 = Base.not_int(%20)::Bool
└──       goto #4 if not %21
3 ─       goto #2
4 ┄       return PoiPair

Thank you for your kindly help!

Actually, the first thing that stands out to me is that you’re unnecessarily using collect in your double-for-loop implementation. There’s no reason to do that, and it’s slowing down the code quite a bit because it forces Julia to allocate a vector rather than just cheaply iterating over a unit range. The code works fine without collect():

julia> function forloop(A::Matrix{Float64},NumNod::Int64,s::Float64)
           PoiPair = Array{Tuple{Int64,Int64},1}(undef,1)
           popfirst!(PoiPair)
           for i = 1:NumNod-1
               for j = i+1:NumNod
                   Dist = (A[1,i]-A[1,j])^2+(A[2,i]-A[2,j])^2
                   if Dist <= s^2
                       push!(PoiPair,(i,j))
                   end
               end
           end
           return PoiPair
       end
forloop (generic function with 1 method)

julia> @btime forloop(A,NumNod,s)
  1.045 ms (14 allocations: 512.67 KiB)

That’s about 30% faster, for free!

As for why your cell-pair algorithm is slower, there’s a lot we can do at a performance-engineering level, but I think getting the algorithm exactly right is the first step. In particular, your GetNeighborCell function doesn’t look right to me (although I admit I just learned the algorithm from the links you provided). First of all, why are you appending cells which are not neighbors (like (key[1] +2, key[2]) ) ? The algorithm as stated in the links you posted only requires checking adjacent cells, but you’re checking cells that are adjacent also the ones that are two cells away. Second, for every cell you are providing the neighbors in all directions. That wastes a lot of effort, because you end up checking for pairs of points in cells A and B both when you look at cell A’s neighbors and when you look at cell B’s neighbors. That’s why Fixed Radius Near Neighbor Algorithm mentions only searching the forward buckets; i.e. the neighboring cells with equal or higher indices. That’s an easy way to avoid duplicating comparisons between cells.

2 Likes

Thank you for your reply!
Actually I am new to Julia and I don’t know the difference between 1:10 and collect(1:10), I just saw others write codes this way and I repeated. I used to think that 1:10 is also an array as in Matlab, and now I find that 1:10 is just a range rather than an array in Julia. Thank you!
As for the second question, you should notice that I wrote

r = s/sqrt(Dim)

in function FRNNMain and passed it as a parameter to the function PoiCell. So the cell size is smaller than s. Then the distance between the points in one cell is smaller than r automatically. So the number of NeighborCell is 20,as shown in the following figure


In 2-D cases, this will results in finding pairs in 10s^2 area. If we set the cell size equal to s, than we will need to search pairs in 9s^2 area. But in higher dimension space, take r=s/\sqrt(dim) as the cell size is more efficient.
The third part is about the duplicating comparisions between cells. It is because the cells in the Dict is not ordered, and they are not next to each other. I wonder if we just take the forward cells we may miss some point pairs. I’m not quite sure about that.
Looking forward to your reply!

God Bless You! I succeed!
I took your suggestions and modify the neighbor cell to forward cell. This opeartion avoids duplicating comparisions between cells thus reduce the judgement of repeated pairs which costs a lot of time. Now the program is 10 times faster than the for loop!
Thank you again!
And here is the modified program::

# fixed-radius nearest neighbor
#cell list algorithm
function FRNNMain(NumNod::Int64,Dim::Int64,A::Array{Float64,2},s::Float64)
    PoiPair = Array{Tuple{Int64,Int64},1}(undef,1)
    popfirst!(PoiPair)
    Cell = PoiCell(A,NumNod,s)
    for key in keys(Cell)
        vaule = Cell[key]
        InnerCellPair!(vaule,PoiPair,A,s)
        LoopNeighbor!(key,Cell,PoiPair,A,s)
    end
    return PoiPair
end
function PoiCell(A::Matrix{Float64},NumNod::Int64,s::Float64)
    Cell = Dict{Tuple{Int64,Int64},Array{Int64,1}}()
    for i = 1:1:NumNod
        x = fld(A[1,i],s)
        y = fld(A[2,i],s)
        z = (x,y)
        if z ∉ keys(Cell)
            Cell[z] = [i]
        else
            push!(Cell[z],i)
        end
    end
    return Cell
end
function GetForwardCell(key::Tuple{Int64,Int64})
    NeiCell = Array{Tuple{Int64,Int64}}(undef,4,1)
    NeiCell[1] = (key[1]+1,key[2])
    NeiCell[2] = (key[1],key[2]-1)
    NeiCell[3] = (key[1]+1,key[2]-1)
    NeiCell[4] = (key[1]+1,key[2]+1)
    return NeiCell
end
function InnerCellPair!(value::Array{Int64},PoiPair::Array{Tuple{Int64,Int64},1},A::Matrix{Float64},s::Float64)
    Len = length(value)
    if Len >= 2
        for i = 1:Len-1
            for j = i+1:Len
                AddorNot!(value[i],value[j],A,s,PoiPair)
            end
        end
    end
    return PoiPair
end
function LoopNeighbor!(key::Tuple{Int64,Int64},Cell::Dict{Tuple{Int64,Int64},Array{Int64,1}},PoiPair::Array{Tuple{Int64,Int64},1},A::Array{Float64,2},s::Float64)
    NeiCell = Array{Tuple{Int64,Int64}}(undef,3,1)
    NeiCell = GetForwardCell(key)
    for i = 1:4
        if NeiCell[i] ∈ keys(Cell)
            FindPair!(Cell[key],Cell[NeiCell[i]],PoiPair,A,s)
        end
    end
    return PoiPair
end
function FindPair!(value::Array{Int64},value1::Array{Int64},PoiPair::Array{Tuple{Int64,Int64},1},A::Array{Float64,2},s::Float64)
    for P1 in value
        for P2 in value1
            AddorNot!(P1,P2,A,s,PoiPair)
        end
    end
    return PoiPair
end
function Distance(Point1::Array{Float64,1},Point2::Array{Float64,1})
    Dist = (Point1[1]-Point2[1])^2+(Point1[2]-Point2[2])^2::Float64
    return Dist
end
function AddorNot!(P1::Int64,P2::Int64,A::Matrix{Float64},s::Float64,PoiPair::Array{Tuple{Int64,Int64},1})
    Dist::Float64 = (A[1,P1]-A[1,P2])^2+(A[2,P1]-A[2,P2])^2
    if Dist <= s*s
        push!(PoiPair,(P1,P2))
    end
    return PoiPair
end
NumNod = 1000
Dim = 2
A = rand(Dim,NumNod)
s = 0.1;
a = FRNNMain(NumNod,Dim,A,s)

and the test code

NumNod = 10000
Dim = 2
s = 0.1*100
println("For Loop")
for i = collect(1:10)*NumNod
    A = rand(Dim,i)*100
    @time forloop(A,NumNod,s)
end
println("Cell List")
for i = collect(1:10)*NumNod
    A = rand(Dim,i)*100
    @time FRNNMain(NumNod,Dim,A,s)
end

Here is the results

For Loop
  0.123814 seconds (21 allocations: 33.001 MiB, 3.29% gc time)
  0.122190 seconds (21 allocations: 33.001 MiB)
  0.122234 seconds (21 allocations: 33.001 MiB)
  0.127930 seconds (21 allocations: 33.001 MiB, 2.76% gc time)
  0.122368 seconds (21 allocations: 33.001 MiB)
  0.123387 seconds (21 allocations: 33.001 MiB)
  0.126333 seconds (21 allocations: 33.001 MiB, 3.64% gc time)
  0.124472 seconds (21 allocations: 33.001 MiB)
  0.125733 seconds (21 allocations: 33.001 MiB)
  0.126474 seconds (21 allocations: 33.001 MiB, 2.66% gc time)
Cell List
  0.062398 seconds (932 allocations: 33.254 MiB, 12.83% gc time)
  0.058075 seconds (932 allocations: 33.254 MiB, 7.76% gc time)
  0.051575 seconds (933 allocations: 33.256 MiB)
  0.059760 seconds (931 allocations: 33.252 MiB, 9.79% gc time)
  0.054577 seconds (931 allocations: 33.252 MiB, 5.11% gc time)
  0.052947 seconds (932 allocations: 33.254 MiB)
  0.060222 seconds (931 allocations: 33.252 MiB, 8.65% gc time)
  0.055781 seconds (931 allocations: 33.252 MiB, 5.34% gc time)
  0.051754 seconds (931 allocations: 33.252 MiB)
  0.058636 seconds (932 allocations: 33.254 MiB, 12.72% gc time)

Thank you again!

1 Like

Just a note, it seems

    NeiCell = Array{Tuple{Int64,Int64}}(undef,3,1)

is unused. You could also hoist the creation of

    NeiCell = Array{Tuple{Int64,Int64}}(undef,4,1)

to outside the function and modify it in place instead. Will save some allocations.

I would recommend you to read the style guide (Style Guide · The Julia Language), especially the naming section because the way the code is written right now looks very different from most Julia code and it adds some “friction” to read it.

A small rewrite (with some other small tweaks) could look something like:

this
# fixed-radius nearest neighbor
#cell list algorithm

const Intx2 = Tuple{Int, Int}

function FRNN_main(num_nod::Int, dim::Int, A::Matrix{Float64}, s::Float64)
    poi_pair = Intx2[]
    cell = poi_cell(A,num_nod, s)
    nei_cell = Vector{Intx2}(undef, 4)
    for key in keys(cell)
        value = cell[key]
        inner_cell_pair!(value, poi_pair, A, s)
        loop_neighbor!(key, nei_cell, cell, poi_pair, A, s)
    end
    return poi_pair
end

function poi_cell(A::Matrix{Float64}, num_nod::Int, s::Float64)
    cell = Dict{Intx2,Vector{Int}}()
    for i = 1:1:num_nod
        x = fld(A[1,i],s)
        y = fld(A[2,i],s)
        z = (x,y)
        if z ∉ keys(cell)
            cell[z] = [i]
        else
            push!(cell[z],i)
        end
    end
    return cell
end

function get_forward_cell!(nei_cell::Vector{Intx2}, key::Intx2)
    nei_cell[1] = (key[1]+1,key[2])
    nei_cell[2] = (key[1],  key[2]-1)
    nei_cell[3] = (key[1]+1,key[2]-1)
    nei_cell[4] = (key[1]+1,key[2]+1)
    return nei_cell
end

function inner_cell_pair!(value::Array{Int}, poi_pair::Vector{Intx2}, A::Matrix{Float64}, s::Float64)
    len = length(value)
    if len >= 2
        for i = 1:len-1
            for j = i+1:len
                add_or_not!(value[i], value[j], A, s, poi_pair)
            end
        end
    end
    return poi_pair
end

function loop_neighbor!(key::Intx2, nei_cell::Vector{Intx2}, cell::Dict{Intx2,Vector{Int}}, poi_pair::Vector{Intx2}, A::Matrix{Float64},s ::Float64)
    get_forward_cell!(nei_cell, key)
    for i = 1:4
        if nei_cell[i] ∈ keys(cell)
            find_pair!(cell[key], cell[nei_cell[i]], poi_pair, A, s)
        end
    end
    return poi_pair
end

function find_pair!(value::Array{Int}, value1::Array{Int}, poi_pair::Vector{Intx2}, A::Matrix{Float64}, s::Float64)
    for P1 in value
        for P2 in value1
            add_or_not!(P1, P2, A, s, poi_pair)
        end
    end
    return poi_pair
end

function add_or_not!(P1::Int, P2::Int, A::Matrix{Float64}, s::Float64, poi_pair::Vector{Intx2})
    dist = (A[1,P1]-A[1,P2])^2 + (A[2,P1]-A[2,P2])^2
    if dist <= s^2
        push!(poi_pair, (P1,P2))
    end
    return poi_pair
end

num_nod = 10000
dim = 2
s = 0.1*100
println("For Loop")

println("cell List")
for i = collect(1:10)*num_nod
    A = rand(dim,i)*100
    @time FRNN_main(num_nod,dim,A,s)
end

Just a question, what does it mean when NumNod is not the same as the number of points in A?

GitHub - KristofferC/NearestNeighbors.jl: High performance nearest neighbor data structures and algorithms for Julia. might be interesting to look at. You could do your query with

tree = KDTree(a)
inrange(tree, A, S)
1 Like

This won’t impact performance much, but it just struck me as strange. You apparently create an array with one undef element, pop it and never use it? Why not just create an empty array in the first place?

Vector{Tuple{Int64,Int64}}()
# or
Tuple{Int64,Int64}[] 

Turns out, a dumb algorithm with r = s is faster in this case.
Instead of searching “forward cells”, we can exploit the fact that the indices in each cell list are in the ascending order, so that we can take P_1 from cell C_1 in the “forward” order and iterate over P_2 in cell C_2 in the “backwards” order, and break the iteration as soon as P_2 \le P_1. That also means that we don’t need a separate function to search for neighbor pairs in the same cell.

Code
# fixed-radius nearest neighbor
#cell list algorithm

function frnn_main(num_nod::Integer, dim::Integer, A::Matrix{<:Real}, s::Real)
    poi_pair = Tuple{Int,Int}[]
    cell = poi_cell(A, num_nod, s)
    for key in keys(cell)
        val = cell[key]
        loop_neigh!(key, cell, poi_pair, A, s)
    end
    return poi_pair
end

function neighbors(key::Tuple{Int, Int})
    ix, iy = key
    return (
        (ix+1, iy-1),
        (ix+1, iy),
        (ix+1, iy+1),
        (ix, iy-1),
        (ix, iy),
        (ix, iy+1),
        (ix-1, iy-1),
        (ix-1, iy),
        (ix-1, iy+1)
    )
end

function poi_cell(A::Matrix{<:Number}, num_nod::Integer, r::Real)
    cell = Dict{Tuple{Int,Int}, Vector{Int}}()
    for i = 1:num_nod
        x = floor(Int, A[1,i] / r)
        y = floor(Int, A[2,i] / r)
        z = (x,y)
        if haskey(cell, z)
            push!(cell[z], i)
        else
            cell[z] = [i]
        end
    end
    return cell
end

function loop_neigh!(key::Tuple{Int,Int}, cell::Dict{Tuple{Int,Int},Vector{Int}}, poi_pair::Vector{Tuple{Int,Int}}, A::Matrix{Float64}, s::Real)
    nei_cell = neighbors(key)
    c₁ = cell[key]
    for ineigh in nei_cell
        c₂ = get(cell, ineigh, nothing)
        if !isnothing(c₂)
            find_pair!(c₁, c₂, poi_pair, A, s)
        end
    end
    return poi_pair
end

function find_pair!(c₁::Array{Int}, c₂::Array{Int}, poi_pair::Vector{Tuple{Int64,Int64}}, A::Matrix{Float64}, s::Real)
    for p₁ in c₁
        for p₂ in reverse(c₂)
            p₂ > p₁ || break
            d² = (A[1, p₁]-A[1, p₂])^2+(A[2, p₁]-A[2, p₂])^2
            d² <= s^2 && push!(poi_pair, (p₁, p₂))
        end
    end
    return poi_pair
end

And I don’t know what is the actual problem that you want to solve, but when the cell list method is applied in molecular simulation, an additional optimization is that the space allocated for the cell lists can be re-used between simulation steps. If your problem allows for such an optimization, then it’s definitely worth adding.

Another related package: NeighbourLists.jl

(Depending on the use case this can be more efficient than nearestneighbours)

Actually I didn’t know how to initialize an array before, so I created an undef array. But julia will fill the undef array with tuple consists of big random number and that is undesired, so I pop the first random tuple to create a 0-element array of tuple.
I just tried your suggestion, and it works well! Thank you for let me know more about julia.

Thank you for your sincerely suggestions. I create the NeiCell in the main function and pass it to the functions as a parameter. It really reduces allocations! That’s amazing to me!
As for the code style, I am new to programming and I can just write simple Matlab code before. Last year I used a program written by my teacher’s elder student, which uses camel case in the variable names and function names. I read his program so many many times that I am influenced by his style deeply. I will try to form my own code style in Julia in the future. Your code looks really precise, and I have really gotten something from it. Thank you again.
When NumNod is bigger that the points in A, there will be a BoundsError; when NumNod is smaller than the the points in A, the program will miss some points. Actually, NumNod is not necessary and it can be replaced by

length(A)

To be honest, before I wrote this program, I looked at your github program and decided to use your package. However, I know nothing about tree and data structures. So I write the cell list algorithm firstly and I will look at your program after I learn the tree and data structures.
Thank you again!

I think your idea is really interesting. You search all the neighbors but avoid duplicating points by taking advantage of its original order. And your program runs faster than mine! That is really amazing.

Look like this package uses the python package?

No - it is pure Julia cell list implementation. Some tests are performed against a python package.

I am using this algorithm in damage mechanics programming. Actucally, for a given point set, the return point pairs will be reused in each calculate step. You mean that the return dict of poi_cell can be re-used again?

Really? I will get a look. Thank you. Maybe I will make my program run faster after reading it!

1 Like

In that case, re-using the list instead of re-allocating it looks like a good idea.

To do that, you need to pass additionally “output” arguments poi_pair and cell to frnn_main, and pass cell to poi_cell. So, instead of creating a fresh dictionary at each call, poi_cell re-populates cell. First, cell must be cleared via, e.g.,

foreach(cell) do cell_num, cell_points
    empty!(cell_points)
end

Similarly, poi_pair isn’t created afresh on each call.
And, at that point, it makes sense to join point coordinates, cell list and pair list into a struct:

struct PointsCells
    points::Matrix{Float64}
    cell::Dict{Tuple{Int,Int}, Vector{Int}}
    pairs::Vector{Tuple{Int,Int}}
end

(beware, that is not the recommended way to write Julia code, only a starting point; normally, you’d like a parameterized data type to not restrict the field types too much).
Also, if your simulation box doesn’t change size very much, you may want to use a fixed-shape array for cell instead of a dictionary.
Also, if you can get the 2nd edition of Allen&Tildesley’s “Computer Simulation of Liquids”, Chapter 5.3.2 has a nice discussion and references on various methods to organize cell lists.

1 Like

Oh,I understand! Just don’t reallocate the memory for the dict Cell and reuse it. This may save a lot of allocations.
As for the struct, I don’t quite understand ‘a parameterized data type to not restrict filed types too much’. I write Julia code just like this. Maybe I need to clare the type after PointCells?

That’s fine. The topic is a bit complicated, I needed some time to grasp the concept.

Parametric types define structures whose fields can have multiple types, but those types must be declared before use. Vector{Int} is an example, with the type parameter Int.
The above struct may be written as

struct PointsCells{P,C,S}
    points::P
    cell::C
    pairs::S
end

That makes it easier to play with various representations. Say, the points may be in a vector of tuples instead of a matrix, Pairs instead of 2-tuples may be used for pairs, while the structure definition won’t need any change if it’s parametric. I now often find myself writing a parametric type even when I don’t plan using multiple data representations, only because the concrete type names are too long.

Omitting field types hits performance, as the types then become non-inferrable at compile time, so it’s generally not recommended. But then again, you may do it to play with different representations and put concrete types once you’ve made the decision which one best suits your needs.

The manual entry on parametric types: Types · The Julia Language
I’d also recommend the book discussed in topic New book: Hands-on Design Patterns and Best Practices with Julia

1 Like

Thank you for your explanation, I understand a little now. The parametric types can be thought as the input of a function, and thus the output, i.e., the types of the fields inside the structure, are decided. So we can use only just one struct but try not only one types with the it. Thus avoid the repeat declaration of the struct.
Also thank you for your recommendation, I will buy the e-book after soon.