I was tempted to use arrays of static 3-element vectors for x,y,z coordinates that will be used for many geometrical calculations. To test performance, I wrote some codes that basically calculates unit vectors along a curve and between each points on the curve.
using StaticArrays, LinearAlgebra, BenchmarkTools
function unitvec!(uv::AbstractArray, v::AbstractArray)
uv .= v ./ norm(v)
return uv
end
function unitvec(v::AbstractVector)
uv = v ./ norm(v)
return uv
end
function genCur(n)
r = rand(n, 3)
r[1, :] .= 0
cumsum!(r, r, dims=1)
rrow = copy(r)
rcol = copy(transpose(r))
rsv = copy(SVector{3}.(eachrow(r)))
rmv = copy(MVector{3}.(eachrow(r)))
return rrow, rcol, rsv, rmv
end
function nextuv_row(rrow)
uvrow = diff(rrow, dims=1)
#unitvec!.(eachrow(uvrow), eachrow(uvrow))
@inbounds @simd for ii in axes(uvrow, 1)
@views unitvec!(uvrow[ii, :], uvrow[ii, :])
end
return uvrow
end
function nextuv_col(rcol)
uvcol = diff(rcol, dims=2)
#unitvec!.(eachcol(uvcol), eachcol(uvcol))
#unitvec!.((view(uvcol,:,jj) for jj=axes(uvcol,2)),(view(uvcol,:,jj) for jj=axes(uvcol,2)))
@inbounds @simd for jj in axes(uvcol, 2)
@views unitvec!(uvcol[:, jj], uvcol[:, jj])
end
return uvcol
end
function nextuv_sv(rsv)
drsv = diff(rsv, dims=1)
uvsv = map(unitvec,drsv)
return uvsv
end
function nextuv_mv(rmv)
uvmv = diff(rmv, dims=1)
#@views unitvec!.(uvmv, uvmv)
map(unitvec!,uvmv,uvmv)
return uvmv
end
function pairwiseuv_row(rrow)
uvprow = rrow .- rrow'[[CartesianIndex()], :, :]
@inbounds for kk in axes(uvprow, 3), ii in axes(uvprow, 1)
@views unitvec!(uvprow[ii, :, kk], uvprow[ii, :, kk])
end
return uvprow
end
function pairwiseuv_col(rcol)
uvpcol = rcol .- rcol[:, [CartesianIndex()], :]
@inbounds for kk in axes(uvpcol, 3), jj in axes(uvpcol, 2)
@views unitvec!(uvpcol[:, jj, kk], uvpcol[:, jj, kk])
end
return uvpcol
end
function pairwiseuv_sv(rsv)
drsv = rsv .- permutedims(rsv)
#uvpsv = unitvec.(drsv)
uvpsv = map(unitvec,drsv)
return uvpsv
end
function pairwiseuv_mv(rmv)
uvpmv = rmv .- permutedims(rmv)
#@views unitvec!.(uvpmv, uvpmv)
#@inbounds for ii in eachindex(uvpmv)
# unitvec!(view(uvpmv, ii), view(uvpmv, ii))
#end
map(unitvec!,uvpmv,uvpmv)
return uvpmv
end
#----------------------------------------------------------------
rrow, rcol, rsv, rmv = genCur(1000);
uvrow = nextuv_row(rrow);
uvcol = nextuv_col(rcol);
uvsv = nextuv_sv(rsv);
uvmv = nextuv_mv(rmv);
uvprow = pairwiseuv_row(rrow);
uvpcol = pairwiseuv_col(rcol);
uvpsv = pairwiseuv_sv(rsv);
uvpmv = pairwiseuv_mv(rmv);
The result is kind of ,ehh, underwhelming?
julia> @benchmark $uvrow = nextuv_row($rrow)
BenchmarkTools.Trial: 10000 samples with 1 evaluation.
Range (min β¦ max): 22.200 ΞΌs β¦ 5.561 ms β GC (min β¦ max): 0.00% β¦ 99.37%
Time (median): 30.300 ΞΌs β GC (median): 0.00%
Time (mean Β± Ο): 30.967 ΞΌs Β± 92.985 ΞΌs β GC (mean Β± Ο): 5.19% Β± 1.72%
ββββ ββ ββββ
β
ββββ
βββββββββ ββ β
βββββββββ
ββββββββββββββββββββββββββββ
βββββββ
β
β
βββ
ββ
β
βββββββ β
22.2 ΞΌs Histogram: log(frequency) by time 51.4 ΞΌs <
Memory estimate: 23.52 KiB, allocs estimate: 2.
julia> @benchmark $uvcol = nextuv_col($rcol)
BenchmarkTools.Trial: 10000 samples with 1 evaluation.
Range (min β¦ max): 23.200 ΞΌs β¦ 6.258 ms β GC (min β¦ max): 0.00% β¦ 99.46%
Time (median): 31.300 ΞΌs β GC (median): 0.00%
Time (mean Β± Ο): 31.198 ΞΌs Β± 88.420 ΞΌs β GC (mean Β± Ο): 4.80% Β± 1.72%
βββββββ ββ βββ ββββββββββββ β β β
βββββββββββββββββββββ
ββββββββββββββββββββββββββββββββββββββ β
23.2 ΞΌs Histogram: log(frequency) by time 39.6 ΞΌs <
Memory estimate: 23.52 KiB, allocs estimate: 2.
julia> @benchmark $uvsv = nextuv_sv($rsv)
BenchmarkTools.Trial: 10000 samples with 5 evaluations.
Range (min β¦ max): 5.660 ΞΌs β¦ 1.598 ms β GC (min β¦ max): 0.00% β¦ 98.71%
Time (median): 21.520 ΞΌs β GC (median): 0.00%
Time (mean Β± Ο): 19.479 ΞΌs Β± 49.566 ΞΌs β GC (mean Β± Ο): 12.76% Β± 5.09%
βββ ββββββββ
βββββββ β β
βββββββββββββββ
ββββ
ββ
ββββββββββββββββββββββββββββββ
ββββββββ β
5.66 ΞΌs Histogram: log(frequency) by time 36.4 ΞΌs <
Memory estimate: 47.03 KiB, allocs estimate: 4.
julia> @benchmark $uvmv = nextuv_mv($rmv)
BenchmarkTools.Trial: 10000 samples with 1 evaluation.
Range (min β¦ max): 11.700 ΞΌs β¦ 2.052 ms β GC (min β¦ max): 0.00% β¦ 97.22%
Time (median): 12.400 ΞΌs β GC (median): 0.00%
Time (mean Β± Ο): 16.273 ΞΌs Β± 44.583 ΞΌs β GC (mean Β± Ο): 6.03% Β± 2.20%
βββ
βββ ββ ββββ β ββ
ββ ββ β
βββββββββββββββββ
βββββ
ββββββ
βββββββββββββββββββββ
β
β
β
β
β
β
β
β
ββ
β
11.7 ΞΌs Histogram: log(frequency) by time 32.7 ΞΌs <
Memory estimate: 47.09 KiB, allocs estimate: 1001.
julia> @benchmark $uvprow = pairwiseuv_row($rrow)
BenchmarkTools.Trial: 181 samples with 1 evaluation.
Range (min β¦ max): 25.059 ms β¦ 37.212 ms β GC (min β¦ max): 0.00% β¦ 25.63%
Time (median): 26.288 ms β GC (median): 0.00%
Time (mean Β± Ο): 27.631 ms Β± 3.108 ms β GC (mean Β± Ο): 5.19% Β± 8.71%
ββ
βββββ
βββ
ββββββββββββββββββββββββββββββββββββ
βββββββββββββββ β
25.1 ms Histogram: frequency by time 35.1 ms <
Memory estimate: 22.91 MiB, allocs estimate: 5.
julia> @benchmark $uvpcol = pairwiseuv_col($rcol)
BenchmarkTools.Trial: 171 samples with 1 evaluation.
Range (min β¦ max): 26.860 ms β¦ 39.529 ms β GC (min β¦ max): 0.00% β¦ 0.00%
Time (median): 27.741 ms β GC (median): 0.00%
Time (mean Β± Ο): 29.267 ms Β± 3.223 ms β GC (mean Β± Ο): 4.95% Β± 8.35%
ββ
βββ
ββββ
ββββ
ββββββββββββββββββββββββββββ
ββββββββββββββββββββ β
26.9 ms Histogram: frequency by time 38 ms <
Memory estimate: 22.91 MiB, allocs estimate: 5.
julia> @benchmark $uvpsv = pairwiseuv_sv($rsv)
BenchmarkTools.Trial: 348 samples with 1 evaluation.
Range (min β¦ max): 9.165 ms β¦ 27.065 ms β GC (min β¦ max): 0.00% β¦ 25.63%
Time (median): 11.888 ms β GC (median): 0.00%
Time (mean Β± Ο): 14.368 ms Β± 3.707 ms β GC (mean Β± Ο): 20.11% Β± 19.27%
ββ ββββ ββ ββ
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ β
9.17 ms Histogram: log(frequency) by time 21.3 ms <
Memory estimate: 45.78 MiB, allocs estimate: 6.
julia> @benchmark $uvpmv = pairwiseuv_mv($rmv)
BenchmarkTools.Trial: 191 samples with 1 evaluation.
Range (min β¦ max): 13.722 ms β¦ 49.025 ms β GC (min β¦ max): 0.00% β¦ 34.56%
Time (median): 21.945 ms β GC (median): 0.00%
Time (mean Β± Ο): 26.188 ms Β± 11.302 ms β GC (mean Β± Ο): 26.16% Β± 21.42%
ββ β β
ββββ
βββββββββββββββββββββ
βββββββββββ
ββ
βββββββββ
ββββββ
ββββββ β
13.7 ms Histogram: frequency by time 47.4 ms <
Memory estimate: 45.78 MiB, allocs estimate: 1000006.
Although looking at the # of allocations, apparently the member S/Mvectors were not mutated, but rather replaced by new vectors, so this could be the reason for lousy performance? I have tried to use @views
but this doesnβt seem to change the behavior.
Any pro tips?