I found a ~10x faster alternative to partialsortperm
using StaticArrays.jl and an insertion sort. This is still the top answer on Google so thought I’d add to this in case people are looking for a super fast maxk
.
using StaticArrays: MVector
const MAXK = 10_000 # Just to preallocate Val(i); increase as needed
const PREALLOC_VALS = ntuple(Val, MAXK)
bottomk(x, k) = _bottomk_dispatch(x, PREALLOC_VALS[k])
function _bottomk_dispatch(x::AbstractVector{T}, ::Val{k}) where {T,k}
@assert k >= 2
indmin = MVector{k}(ntuple(_ -> 0, k))
minval = MVector{k}(ntuple(_ -> typemax(T), k))
_bottomk!(x, minval, indmin)
return [minval...], [indmin...]
end
function _bottomk!(x, minval, indmin)
@inbounds @fastmath for i in eachindex(x)
new_min = x[i] < minval[end]
if new_min
minval[end] = x[i]
indmin[end] = i
for ki in length(minval):-1:2
need_swap = minval[ki] < minval[ki - 1]
if need_swap
minval[ki], minval[ki - 1] = minval[ki - 1], minval[ki]
indmin[ki], indmin[ki - 1] = indmin[ki - 1], indmin[ki]
end
end
end
end
return nothing
end
we can see that it is much faster than the partialsortperm
strategy:
julia> @btime bottomk(x, 5) setup=(x=randn(1000));
779.506 ns (5 allocations: 320 bytes)
julia> @btime partialsortperm(x, 1:5) setup=(x=randn(1000));
8.122 μs (4 allocations: 12.05 KiB)
even if I use preallocation:
julia> idx = collect(eachindex(x));
julia> @btime partialsortperm!(idx, x, 1:5) setup=(x=randn(1000));
7.365 μs (4 allocations: 4.16 KiB)
and gives the same outputs:
julia> bottomk(x, 5)[2]
5-element Vector{Int64}:
185
603
21
386
105
julia> partialsortperm(x, 1:5)
5-element view(::Vector{Int64}, 1:5) with eltype Int64:
185
603
21
386
105
This is an extension of @Elrod’s solution in Why is minimum so much faster than argmin? - #11 by Elrod. I couldn’t seem to get @turbo
working for topk
here (but please share if you find one!)