Actualy I was able to find the answer to my own question: it appears that I just need to tell Julia that the heap size will not change by defining it as a struct and not a mutable struct ! Doing so the gain of speed is consequent. I post here the code:
Few changes on the heap type definition:
struct PreallocatedBinaryHeap{N, T, O <: Base.Ordering}
ordering::O
tree::MVector{N, T}
n::MVector{1, Int}
buf::MVector{1, T}
#n::Integer # index of the next entry
#buf::T # buffer to avoid allocation
function PreallocatedBinaryHeap{N, T}(o::Base.Ordering) where {N, T}
@assert N > 0 "Preallocated length should be > 0"
new{N, T, typeof(o)}(o, MVector{N, T}(undef), ones(MVector{1, Int}), MVector{1, T}(undef))
#new{N, T, typeof(o)}(o, MVector{N, T}(undef), 1, Vector{T}(undef, 1)[1])
end
function PreallocatedBinaryHeap{N, T}(o::Base.Ordering, v::AbstractVector) where {N, T}
n_el::Integer = length(v)
@assert N >= n_el "Preallocated length should be greater than vector length"
tree = MVector{N, T}(undef)
tree[1:n_el] .= heapify(v, o)
new{N, T, typeof(o)}(o, tree, MVector{1, Int}(n_el + 1), MVector{1, T}(undef))
#new{N, T, typeof(o)}(o, tree, n_el + 1, Vector{T}(undef, 1)[1])
end
end
PreallocatedBinaryHeap(o::Base.Ordering, v::AbstractVector{T}, N::Integer) where T = PreallocatedBinaryHeap{N, T}(o, v)
Base.length(h::PreallocatedBinaryHeap) = h.n[1] - 1 #h.n - 1
Base.isempty(h::PreallocatedBinaryHeap) = h.n[1] == 1 #h.n == 1
function isfull(h::PreallocatedBinaryHeap{N, T, O}) where {N, T, O}
return h.n[1] > N #h.n > N
end
Base.getindex(h::PreallocatedBinaryHeap, i) = begin
@assert (i <= length(h)) && (i > 0) "Index out of bounds"
getindex(h.tree, i)
end
Base.iterate(h::PreallocatedBinaryHeap) = (h[1], 2)
Base.iterate(h::PreallocatedBinaryHeap, state) = (state <= length(h) ? (h[state], state + 1) : nothing)
Base.lastindex(h::PreallocatedBinaryHeap) = length(h)
Base.push!(h::PreallocatedBinaryHeap{N, T, O}, v::T) where {N, T, O} = begin
@assert !isfull(h) "The heap is full"
h.tree[h.n[1]] = v #h.tree[h.n] = v
h.n[1] += 1 #h.n += 1
percUp!(h, length(h))
end
Base.pop!(h::PreallocatedBinaryHeap) = begin
@assert !isempty(h) "The heap is empty"
h.buf[1] = h.tree[1] #h.buf = h.tree[1]
h.tree[1] = h.tree[length(h)]
h.n[1] -= 1 #h.n -= 1
percDown!(h, 1)
return h.buf[1] #h.buf
end
function percUp!(h::PreallocatedBinaryHeap, i::Integer)
parent = i >>> 1
@inbounds while parent > 0
# Compare to the parent and swap if needed
if Base.lt(h.ordering, h.tree[i], h.tree[parent])
h.buf[1] = h.tree[parent] #h.buf = h.tree[parent]
#tmp = h.tree[parent]
h.tree[parent] = h.tree[i]
h.tree[i] = h.buf[1] #h.buf #tmp
end
i = parent
parent >>>= 1
end
end
function percUp!(v::AbstractVector, i::Integer, o::Base.Ordering)
parent = i >>> 1
@inbounds while parent > 0
# Compare to the parent and swap if needed
if Base.lt(o, v[i], v[parent])
tmp = v[parent]
v[parent] = v[i]
v[i] = tmp
end
i = parent
parent >>>= 1
end
end
function percDown!(h::PreallocatedBinaryHeap, i::Integer)
heapsize = length(h)
@inbounds while (2 * i) <= heapsize
# find the smallest child according to the ordering
if (2 * i + 1) > heapsize
sc = 2 * i
elseif Base.lt(h.ordering, h.tree[2 * i], h.tree[2 * i + 1])
sc = 2 * i
else
sc = 2 * i + 1
end
# swap if needed
if Base.lt(h.ordering, h.tree[sc], h.tree[i])
h.buf[1] = h.tree[i] #h.buf = h.tree[i]
#tmp = h.tree[i]
h.tree[i] = h.tree[sc]
h.tree[sc] = h.buf[1] #h.buf #tmp
end
i = sc
end
end
function percDown!(v::AbstractVector, i::Integer, o::Base.Ordering)
vectsize = length(v)
@inbounds while (2 * i) <= vectsize
# find the smallest child according to the ordering
if (2 * i + 1) > vectsize
sc = 2 * i
elseif Base.lt(o, v[2 * i], v[2 * i + 1])
sc = 2 * i
else
sc = 2 * i + 1
end
# swap if needed
if Base.lt(o, v[sc], v[i])
tmp = v[i]
v[i] = v[sc]
v[sc] = tmp
end
i = sc
end
end
function heapify!(v::AbstractVector, o::Base.Ordering)
for i in div(length(v), 2):-1:1
percDown!(v, i, o)
end
end
heapify(v::AbstractVector, o::Base.Ordering) = begin
v_copy = copyto!(similar(v), v)
heapify!(v_copy, o)
return v_copy
end
And the benchmark:
@time begin
h = PreallocatedBinaryHeap(Base.Forward, rand(UInt8, 10), 200)
while !isfull(h)
push!(h, rand(UInt8))
end
while !isempty(h)
pop!(h)
end
end
# 0.000147 seconds (13 allocations: 848 bytes)
@time begin
h = PreallocatedBinaryHeap(Base.Forward, rand(UInt8, 10), 1_000_000)
while !isfull(h)
push!(h, rand(UInt8))
end
while !isempty(h)
pop!(h)
end
end
# 0.222954 seconds (14 allocations: 977.266 KiB)