The improvements of that original code I can see are:

set N
inside the getSparse
function, because there it is a global variable.

Use @views
in the function, because the slices allocate new arrays.

add @inbounds
to your assignments.
With those modifications your functions takes almost one third of the original time (shown below).
The most simple implementation of that with CellListMap is shown bellow, and is slightly faster in serial.
A working example is shown bellow, with the following results (I am not running parallel versions, because for 1000 points they do not seem to be worthwhile, and because the second options needs a custom reduction function for the sparse array. It will be more easy and useful if you have property to compute from the pairs directly).
julia> include("./norberg.jl")
Original:
4.944 ms (64724 allocations: 2.09 MiB)
Original  tweaked:
1.860 ms (4336 allocations: 522.50 KiB)
CellListMap with neighbourlist:
1.244 ms (1766 allocations: 671.86 KiB)
CellListMap without neighbourlist:
1.278 ms (1753 allocations: 575.41 KiB)
Code
using NearestNeighbors, SparseArrays, BenchmarkTools
using CellListMap
using StaticArrays
using Test
function getSparse(v,W)
S=spzeros(Float32,N,N)
for j in 1:N
for i in v[j]
if j<i
S[i,j]=S[j,i]=sqrt(sum(abs2.(W[:,i].W[:,j])))
end
end
end
S
end
@views function getSparse2(v,W)
N = size(W,2)
S=spzeros(Float32,N,N)
for j in 1:N
for i in v[j]
if j<i
@inbounds S[i,j]=S[j,i]=sqrt(sum(abs2.(W[:,i].W[:,j])))
end
end
end
S
end
function nn(W,minDistance)
tree=KDTree(W,leafsize=1,reorder=true)
v=inrange(tree,W,minDistance,true)
getSparse(v,W)
end
function nn2(W,minDistance)
tree=KDTree(W,leafsize=1,reorder=true)
v=inrange(tree,W,minDistance,true)
getSparse2(v,W)
end
# Using the CellListMap.neighbourlist function
function cl_nn(W,minDistance)
W_svector = reinterpret(reshape,SVector{3,Float32},W)
pairs = CellListMap.neighbourlist(W_svector,minDistance,parallel=false)
N = size(W,2)
S = spzeros(Float32,N,N)
for (i,j,d) in pairs
@inbounds S[i,j] = d
@inbounds S[j,i] = d
end
S
end
# Now let us populate the sparse matrix without building the list
function cl_nn2(W,minDistance)
N = size(W,2)
S = spzeros(Float32,N,N)
W_svector = reinterpret(reshape,SVector{3,Float32},W)
box = Box(limits(W_svector),minDistance)
cl = CellList(W_svector,box,parallel=false)
map_pairwise!(
(x,y,i,j,d2,S) > begin
d = sqrt(d2)
@inbounds S[i,j] = d
@inbounds S[j,i] = d
return S
end,
S,
box, cl, parallel=false
)
return S
end
N=1000
W=rand(Float32,3,N) #points
minDistance=0.1
@test nn(W,minDistance) ≈ cl_nn(W,minDistance)
@test nn(W,minDistance) ≈ cl_nn2(W,minDistance)
println("Original:")
@btime nn($W,$minDistance)
println("Original  tweaked:")
@btime nn2($W,$minDistance)
println("CellListMap with neighbourlist:")
@btime cl_nn($W,$minDistance)
println("CellListMap without neighbourlist:")
@btime cl_nn2($W,$minDistance)
nothing
(note: that reinterpret(reshape...
is only there because one auxiliary function is not accepting matrices as inputs. I will fix that tomorrow and that will no longer be necessary)