Pairwise computation slower than Python (Cython) code (BallTree very slow!)

Also this is odd, when I run your code (exactly as it is, with 100_000 particles) I still get lots of allocations,

julia> @time test()
  0.876833 seconds (1.54 M allocations: 56.900 MiB, 0.99% gc time)

Here is the exact code,

using CellListMap
using StaticArrays
using LinearAlgebra

export test

function getdx(x,y,period)
    delta = x - y
    return (delta > 0.5*period ? delta - period : delta)*sign(x-y)
end
get_periodic_difference_cell_lists(x,y,period) = 
    SVector{3,Float64}(ntuple(i -> getdx(x[i],y[i],period[i]), 3))

function compute_pairwise_mean_cell_lists!(x,y,i,j,d2,hist,velocities, rbins,sides)
    d = get_periodic_difference_cell_lists(x,y,sides)
    r = LinearAlgebra.norm(d)
    ibin = searchsortedfirst(rbins, r) - 1
    hist[1][ibin] += 1
    hist[2][ibin] += LinearAlgebra.dot(velocities[i] - velocities[j],d)/r
    return hist
end

function get_pairwise_velocity_radial_mean_cell_lists(
        positions, velocities,
        rbins,
        boxsize, 
        lc, box, n
        )
    positions = reshape(reinterpret(SVector{3,Float64},positions),n)
    velocities = reshape(reinterpret(SVector{3,Float64},positions),n)
    initlists!(positions,box,lc)
    hist = (zeros(Int,length(rbins)-1), zeros(Float64,length(rbins)-1))
    hist = map_pairwise!(
            (x,y,i,j,d2,hist) -> compute_pairwise_mean_cell_lists!(x,y,i,j,d2,hist,velocities, rbins, boxsize),
            hist, positions, box, lc,
    )
    n_pairs = hist[1]
    mean_v_r = hist[2]
    mean_v_r[n_pairs .> 0] = mean_v_r[n_pairs .> 0]./n_pairs[n_pairs .> 0]
    return mean_v_r
end

function test()
  n_halos = 100_000
  boxsize = 250.
  Lbox = [boxsize,boxsize,boxsize]
  positions = boxsize .* rand(Float64, 3, n_halos)
  velocities = rand(Float64, 3, n_halos)
  rbins = [0.,2.,4.,6.,8.,10.]
  
  n = size(positions)[2]
  r_max = maximum(rbins)
  lc = LinkedLists(n)
  box = Box(Lbox, r_max)

  get_pairwise_velocity_radial_mean_cell_lists(
    positions,
    velocities,
    rbins,
    Lbox,
    lc, box, n
  )

end