Maintaining a fixed size "top N" values list

Updated: I found a bug in my memmove option and in the previously posted bubble sort option (it needed an initial v[N] = x in the loop). I have fixed them here now. Also, I set all fills to Inf because the data includes negative numbers and so zeros would change the test.

After these changes, all the full sorting approaches are very near each other.

  35.825 ms (0 allocations: 0 bytes)
  638.915 ms (0 allocations: 0 bytes)
  135.472 ms (0 allocations: 0 bytes)
  32.901 ms (0 allocations: 0 bytes)
  125.850 ms (0 allocations: 0 bytes)
  32.692 ms (0 allocations: 0 bytes)

And oddly enough, shrinking the dataset back to the original size, the original plain sorting approach appears to be the fastest, but just barely.

  605.300 μs (0 allocations: 0 bytes)
  9.986 ms (0 allocations: 0 bytes)
  2.600 ms (0 allocations: 0 bytes)
  614.800 μs (0 allocations: 0 bytes)
  2.280 ms (0 allocations: 0 bytes)
  608.400 μs (0 allocations: 0 bytes)

---- Original Post ----
This community is great. Thank you all for so much help. I added two more options: SortedSet and finding the index + memmove (unsafe_copyto!). I used both searchsortedfirst and looping to find the index, but both resulted in about the same time. I separated out the initialization code from each approach to isolate timing just the usage. I also increased the test size (50m/10k) to match closer what I might be working with.

The bubble sort option still shows as the fastest for me. Curiously, the plain sort approach comes in a close second for me which is different than what you have been seeing, even when changing it back to the smaller data size. Is there some heuristic that got switched perhaps?

6.469 ms (0 allocations: 0 bytes) # plain sort
70.680 ms (0 allocations: 0 bytes) # partial sort
25.371 ms (0 allocations: 0 bytes) # modified heap
6.458 ms (0 allocations: 0 bytes) # bubble sort
26.923 ms (0 allocations: 0 bytes) # SortedSet
98.929 ms (0 allocations: 0 bytes) # unsafe_copyto!
Summary
init1(N) = zeros(N)
function min_values1(N, collection, v)
    @inbounds for x in collection
        if x < last(v)
            v[N] = x
            sort!(v)
        end
    end
    v
end
vr = min_values1(3, [4], [1,3,5])
# @info "check" vr
@assert vr == [1,3,4]
vr = min_values1(3, [6], [1,3,5])
# @info "check2" vr
@assert vr == [1,3,5]
vr = min_values1(3, [1], [1,3,5])
# @info "check3" vr
@assert vr == [1,1,3]
vr = min_values1(3, [0], [1,3,5])
# @info "check4" vr
@assert vr == [0,1,3]
vr = min_values1(3, [2], [1,3,5])
# @info "check5" vr
@assert vr == [1,2,3]

init2(N) = zeros(2N)
function min_values2(N, collection, v)
    i = 1
    @inbounds for x in collection
        v[i] = x

        i += 1
        if i > lastindex(v)
            partialsort!(v, 1:N)
            i = N+1
        end
    end
    partialsort!(v, 1:N)
end

using DataStructures

struct BoundedBinaryHeap{T, O <: Base.Ordering} <: DataStructures.AbstractHeap{T}
    ordering::O
    valtree::Vector{T}
    n::Int # maximum length

    function BoundedBinaryHeap{T}(n::Integer, ordering::Base.Ordering) where T
        n ≥ 1 || throw(ArgumentError("max heap size $n must be ≥ 1"))
        new{T, typeof(ordering)}(ordering, sizehint!(Vector{T}(), n), n)
    end

    function BoundedBinaryHeap{T}(n::Integer, ordering::Base.Ordering, xs::AbstractVector) where T
        n ≥ length(xs) || throw(ArgumentError("initial array is larger than max heap size $n"))
        valtree = sizehint!(DataStructures.heapify(xs, ordering), n)
        new{T, typeof(ordering)}(ordering, valtree, n)
    end
end

BoundedBinaryHeap(n::Integer, ordering::Base.Ordering, xs::AbstractVector{T}) where T = BoundedBinaryHeap{T}(n, ordering, xs)

BoundedBinaryHeap{T, O}(n::Integer) where {T, O<:Base.Ordering} = BoundedBinaryHeap{T}(n, O())
BoundedBinaryHeap{T, O}(n::Integer, xs::AbstractVector) where {T, O<:Base.Ordering} = BoundedBinaryHeap{T}(n, O(), xs)

Base.length(h::BoundedBinaryHeap) = length(h.valtree)
Base.isempty(h::BoundedBinaryHeap) = isempty(h.valtree)
@inline Base.first(h::BoundedBinaryHeap) = h.valtree[1]

@inline Base.pop!(h::BoundedBinaryHeap) = DataStructures.heappop!(h.valtree, h.ordering)

function Base.push!(h::BoundedBinaryHeap, v)
    if length(h) < h.n
        DataStructures.heappush!(h.valtree, v, h.ordering)
    elseif Base.Order.lt(h.ordering, @inbounds(h.valtree[1]), v)
        DataStructures.percolate_down!(h.valtree, 1, v, h.ordering)
    end
    return h
end

init3(N) = BoundedBinaryHeap{Float64}(N, Base.Order.Reverse)
function min_values3(collection, h)
    @inbounds for x in collection
        push!(h, x)
    end
    h
end

init4(N) = fill(Inf, N)
function min_values4(N, collection, v)
    @inbounds for x in collection
        x < v[N] || continue
        v[N] = x
        for i in N-1:-1:1
            x < v[i] || break
            (v[i], v[i+1]) = (x, v[i])
        end
    end
    v
end
vr = min_values4(3, [4], [1,3,5])
# @info "check" vr
@assert vr == [1,3,4]
vr = min_values4(3, [6], [1,3,5])
# @info "check2" vr
@assert vr == [1,3,5]
vr = min_values4(3, [1], [1,3,5])
# @info "check3" vr
@assert vr == [1,1,3]
vr = min_values4(3, [0], [1,3,5])
# @info "check4" vr
@assert vr == [0,1,3]
vr = min_values4(3, [2], [1,3,5])
# @info "check5" vr
@assert vr == [1,2,3]

init5(N) = SortedSet{Float64}(Base.Order.ReverseOrdering(), 1000000:(1000000+N-1))
function min_values5(collection, v)
    @inbounds for x in collection
        if x < first(v)
            delete!(v.bt, DataStructures.beginloc(v.bt)) # pop!(v)
            DataStructures.insert!(v.bt, convert(DataStructures.keytype(v),x), nothing, false) # push!(v, x)
        end
    end
    v
end

init6(N) = fill(Inf, N)
function min_values6(N, collection, v)
    # ptr = pointer(v)
    @inbounds for x in collection
        x < v[N] || continue
        # i = N-1
        # while (i >= 1 && x < v[i])
        #     i -= 1
        # end
        # unsafe_copyto!(pointer(v, i+2), pointer(v, i+1), N-i)
        # v[i+1] = x

        i = searchsortedfirst(v, x)
        if i < N
            # unsafe_copyto!(ptr + (i << 3), ptr + ((i-1) << 3), N-i)
            unsafe_copyto!(pointer(v, i+1), pointer(v, i), N-i)
        end
        v[i] = x
    end
    v
end
vr = min_values6(3, [4], [1,3,5])
# @info "check" vr
@assert vr == [1,3,4]
vr = min_values6(3, [6], [1,3,5])
# @info "check2" vr
@assert vr == [1,3,5]
vr = min_values6(3, [1], [1,3,5])
# @info "check3" vr
@assert vr == [1,1,3]
vr = min_values6(3, [0], [1,3,5])
# @info "check4" vr
@assert vr == [0,1,3]
vr = min_values6(3, [2], [1,3,5])
# @info "check5" vr
@assert vr == [1,2,3]

using BenchmarkTools
x = randn(10_000_000)
N = 1000

v1 = init1(N)
v1 = min_values1(N, x, v1);
v11 = init1(N)
@btime min_values1($N, $x, $v11);

v2 = init2(N)
v2 = min_values2(N, x, v2);
@assert last(v1) == last(v2)
v22 = init2(N)
@btime min_values2($N, $x, $v22);

v3 = init3(N)
v3 = min_values3(x, v3);
@assert last(v1) == pop!(v3)
v33 = init3(N)
@btime min_values3($x, $v33);

v4 = init4(N)
v4 = min_values4(N, x, v4);
@assert last(v1) == last(v4)
v44 = init4(N)
@btime min_values4($N, $x, $v44);

v5 = init5(N);
v5 = min_values5(x, v5);
# @info "check" first(v1) last(v1) first(v5) last(v5) length(v1) length(v5)
@assert last(v1) == first(v5);
v55 = init5(N);
@btime min_values5($x, $v55);

v6 = init6(N);
v6 = min_values6(N, x, v6);
# @info "check" first(v1) last(v1) first(v6) last(v6) length(v1) length(v6)
@assert last(v1) == last(v6);
v66 = init6(N);
@btime min_values6($N, $x, $v66);