I implemented Hoare and Lomuto’s partitions with iterators. They behave as expected in terms of numbers of comparisons (N-1
) and swaps (Hoare’s doing fewer than Lomuto’s).
However the benchmarks are giving me same times or even a bit better for Lomuto, even with the edge case of an all zeros vector (in which I have confirmed Hoare does N/2
swaps and Lomuto does N-1
swaps).
I wonder if the fact that I’m creating two iterators and zipping them (one of them reversed) in Hoare offsets the advantage of fewer swaps?
Here are the two functions:
function lomutopartitioniter!(arr::AbstractVector, pivot_index::Integer)
@inbounds arr[pivot_index], arr[end] = arr[end], arr[pivot_index]
pivot_value = arr[end]
it_lr = Iterators.filter(p -> p[2] <= pivot_value, pairs(@view(arr[begin:end-1])))
final_pivot_index = firstindex(arr)
@inbounds for (j, _) in it_lr
arr[final_pivot_index], arr[j] = arr[j], arr[final_pivot_index]
final_pivot_index += 1
end
@inbounds arr[end], arr[final_pivot_index] = arr[final_pivot_index], arr[end]
return final_pivot_index
end
function hoarepartitioniter!(arr::AbstractVector, pivot_index::Integer)
@inbounds arr[pivot_index], arr[begin] = arr[begin], arr[pivot_index]
pivot_value = arr[begin]
# We take equal elements on both iterators to guarantee final pivot will be in the middle.
it_lr = Iterators.filter(p -> p[2] >= pivot_value, pairs(@view(arr[begin+1:end])))
it_rl = Iterators.filter(p -> p[2] <= pivot_value, Iterators.reverse(pairs(@view(arr[begin+1:end]))))
final_pivot_index = firstindex(arr)
@inbounds for ((i1, _), (i2, _)) in Iterators.zip(it_lr, it_rl)
final_pivot_index = firstindex(arr) + i2
i1 >= i2 && break
arr[begin+i1], arr[begin+i2] = arr[begin+i2], arr[begin+i1]
final_pivot_index = firstindex(arr) + i1
end
if final_pivot_index == firstindex(arr)
if isempty(it_lr)
arr[begin], arr[end] = arr[end], arr[begin]
return lastindex(arr)
else
return firstindex(arr)
end
end
@inbounds arr[begin], arr[final_pivot_index] = arr[final_pivot_index], arr[begin]
return final_pivot_index
end
I tried using the profiler but honestly I’m not sure how to use it well.
Is there some optimization I can do here? Or is this a matter of how I am benchmarking?
Thank you!