Thanks! I will look into that. My current algorithm does not do the naive thing of checking all neighbours every step though. So I don’t think there is a huge win in performance in improving the algorithm (I can be wrong) but I know that memory handling is often at least as important as the algorithm in practice.
Having looked at the pseudo code from the article I could improve the runtime of my code by about 10-15%. I already used most of the concepts from that article, why the improvement wasn’t larger.
The main improvement came from realising that the algorithm either finds a larger or a smaller element than the previous one in the same step, and never none of them and never both, so there could be an if - else instead of two checks. I.e:
if vect[i] > vect[i-1] # ...
If anyone is interested, I post the improved code below. I am still interested in more Julia specific improvements which I suspect would further improve the performance.
Improved code:
function rolling_optima(vect::Any, win_size::Integer; debug=false)
roll_max = zeros(Float64, length(vect))
roll_min = zeros(Float64, length(vect))
opt_index = zeros(Int16, length(vect))
opt_type = zeros(Int8, length(vect))
d_max = Deque{Float64}()
d_min = Deque{Float64}()
win_forward = win_size
win_backward = win_forward*2
opt_ind = 0
#comp = 10
pushfirst!(d_max,vect[1])
pushfirst!(d_min,vect[1])
@inbounds for i in 2:length(vect)
if vect[i] > vect[i-1]
@inbounds for k in 1:length(d_max)
if vect[i] >= front(d_max) popfirst!(d_max)
else break end
end
else
@inbounds for k in 1:length(d_min)
if vect[i] <= front(d_min) popfirst!(d_min)
else break end
end
end
pushfirst!(d_max,vect[i])
pushfirst!(d_min,vect[i])
j = i + 1 - win_forward
if j >= 1
roll_max[i] = back(d_max)
roll_min[i] = back(d_min)
if vect[j] == back(d_max)
if roll_max[i] == roll_max[j]
if j-win_backward > 0
if roll_max[j] > maximum(vect[j-win_backward:j-1])
opt_ind +=1
opt_index[opt_ind] = j
opt_type[opt_ind] = 1
end
end
end
pop!(d_max)
elseif vect[j] == back(d_min)
if roll_min[i] == roll_min[j]
if j-win_backward > 0
if roll_min[j] < minimum(vect[j-win_backward:j-1])
opt_ind +=1
opt_index[opt_ind] = j
opt_type[opt_ind] = -1
end
end
end
pop!(d_min)
end
end
end
opt_index = opt_index[1:opt_ind]
opt_type = opt_type[1:opt_ind]
if debug
return opt_index, opt_type, roll_max, roll_min
else
return opt_index, opt_type
end
end
I’m curious to know happens if you replace Deque{Float64}
with Vector{Float64}
? (The native Julia
array type also supports O(1) amortized push/pop from both ends, and I’m curious to know how it compares.)
Note that maximum(vect[j-win_backward:j-1])
allocates a copy of the slice; you might want to use @views
or just replace it with a loop. (And it seems like you could do even better, e.g. with a heap-like data structure, so that you don’t have to loop over the entire window for every j
.) Similarly for the minimum
call.
Using Vector{Float64}
seems to increase the runtime by almost 40% compared to using the Deque{Float64}
Yes! This is the part of the code that is the newest functional addition and yes it’s probably far from optimal, however I don’t think the code usually spends a lot of time here anyway so the benefits of optimizing this part are probably small.
Using views here gave about 1.5% of speedup. Nothing to write home about but certainly welcome from such a simple change.
As other have already said the key question is " you don’t need to loop over the entire window for every j
"
I changed that part of the code to
if j-win_backward > 0
failed = false
for v in @view vect[j-win_backward:j-1]
if roll_max[j] < v
failed = true
break end
end
if !failed
opt_ind +=1
opt_index[opt_ind] = j
opt_type[opt_ind] = 1
end
it added a slight improvement in speed (again a few %). Is that what you were thinking about or were you thinking about something more sophisticated?
I think your algorithm is O(N), so there is nothing more to improve here. I checked that your code allocates a lot, despite of using views. Maybe I have no your latest code, so please veryfy.
julia> x = rand(10000);
julia> K = 90;
julia> @time result1 = rolling_optima(x, K, debug=true)[3];
3.253689 seconds (4.66 M allocations: 724.375 MiB, 2.50% gc time)
julia> @time result2 = movingmax(x, K);
0.000350 seconds (10.01 k allocations: 546.469 KiB)
julia> result1[K:end] == result2
true
And my implementation of window max:
using LinkedLists
function movingmax(array::AbstractArray{T}, K::Integer) where T <: Real
@assert K > 0
@assert K < length(array)
result = zeros(T, length(array)-K+1)
list = LinkedLists.LinkedList{Tuple{Int, T}}()
windowindex = 0
resultindex = 1
isready = false
for item in array
if !isempty(list) && first(list)[1] == windowindex
popfirst!(list)
end
while !isempty(list) && last(list)[2] < item
pop!(list)
end
push!(list, (windowindex, item))
windowindex = (windowindex + 1) % K
if windowindex == 0
isready = true
end
if isready
result[resultindex] = first(list)[2]
resultindex = resultindex + 1
end
end
return result
end
My latest code below: (note that I have changed the name of a few functions since they were changed in the updated DataStructures.jl package which I now use.
function rolling_optima(vect::Any, win_size::Integer; debug=false)
roll_max = zeros(Float64, length(vect))
roll_min = zeros(Float64, length(vect))
opt_index = zeros(Int16, length(vect))
opt_type = zeros(Int8, length(vect))
d_max = Deque{Float64}()
d_min = Deque{Float64}()
win_forward = win_size
win_backward = win_forward*2
opt_ind = 0
#comp = 10
pushfirst!(d_max,vect[1])
pushfirst!(d_min,vect[1])
@inbounds for i in 2:length(vect)
if vect[i] > vect[i-1]
@inbounds for k in 1:length(d_max)
if vect[i] >= first(d_max) popfirst!(d_max)
else break end
end
else
@inbounds for k in 1:length(d_min)
if vect[i] <= first(d_min) popfirst!(d_min)
else break end
end
end
pushfirst!(d_max,vect[i])
pushfirst!(d_min,vect[i])
j = i + 1 - win_forward
if j >= 1
roll_max[i] = last(d_max)
roll_min[i] = last(d_min)
if vect[j] == last(d_max)
if roll_max[i] == roll_max[j]
if j-win_backward > 0
failed = false
for v in @view vect[j-win_backward:j-1]
if roll_max[j] < v
failed = true
break end
end
if !failed
opt_ind +=1
opt_index[opt_ind] = j
opt_type[opt_ind] = 1
end
end
end
pop!(d_max)
elseif vect[j] == last(d_min)
if roll_min[i] == roll_min[j]
if j-win_backward > 0
failed = false
for v in @view vect[j-win_backward:j-1]
if roll_min[j] > v
failed = true
break end
end
if !failed
opt_ind +=1
opt_index[opt_ind] = j
opt_type[opt_ind] = -1
end
end
end
pop!(d_min)
end
end
end
opt_index = opt_index[1:opt_ind]
opt_type = opt_type[1:opt_ind]
if debug
return opt_index, opt_type, roll_max, roll_min
else
return opt_index, opt_type
end
end
Now to your results. It seems that something was strange with running my function. Here are my current results.
julia> x = rand(10000)
@time rr = rolling_optima(x, 90, debug=true);
0.000927 seconds (26 allocations: 203.266 KiB)
julia> @time rr = rolling_optima(x, 90, debug=false);
0.000916 seconds (20 allocations: 203.031 KiB)
julia> @btime rr = rolling_optima($x, 90, debug=false);
501.711 μs (16 allocations: 202.88 KiB)
While not as good as the results for your function movingmax not as bad as what you had. (I of course did “warm up” runs first when using @time) Could you verify my results?
Confirm, I must have been using code before all optimizations
julia> @time result1 = rolling_optima(x, K, debug=true)[3];
0.000625 seconds (26 allocations: 203.266 KiB)
julia> @time result2 = movingmax(x, K);
0.000341 seconds (10.01 k allocations: 546.469 KiB)
So I think you are pretty fast now, cause your code is doing much more things.
This is very nice to hear! And you are right, my function is doing more. Do you have a feeling that your function is close to what is possible in terms of speed? It’s actually very satisfying to se how few allocations my code is currently performing!
In terms of complexity my code (your also) is optimal - O(N). Anyway, I think much more could be improved in my function, for instance when I changed LinkedList to Deque I am 15% faster.
Very nice! Did you get rid of some of the allocations? Maybe the part of my code that does the simple rolling maximum is very similar to your code after that…?
Yes, alocations are much lower
julia> @btime result1 = rolling_optima($x, $K, debug=true)[3];
439.911 μs (16 allocations: 202.89 KiB)
julia> @btime result2 = movingmax($x, $K);
243.229 μs (10003 allocations: 546.31 KiB)
# Now using Deque
julia> @btime result2 = movingmax($x, $K);
208.768 μs (4 allocations: 93.70 KiB)
Just for comparison I reduced my function to a rolling_maxmin()
and a rolling_max()
one. The last one of course loses some tricks but should be very close to yours?
function rolling_maxmin(vect::Any, win_size::Integer)
roll_max = zeros(Float64, length(vect))
roll_min = zeros(Float64, length(vect))
d_max = Deque{Float64}()
d_min = Deque{Float64}()
pushfirst!(d_max,vect[1])
pushfirst!(d_min,vect[1])
@inbounds for i in 2:length(vect)
if vect[i] > vect[i-1]
@inbounds for k in 1:length(d_max)
if vect[i] >= first(d_max) popfirst!(d_max)
else break end
end
else
@inbounds for k in 1:length(d_min)
if vect[i] <= first(d_min) popfirst!(d_min)
else break end
end
end
pushfirst!(d_max,vect[i])
pushfirst!(d_min,vect[i])
j = i + 1 - win_size
if j >= 1
roll_max[i] = last(d_max)
roll_min[i] = last(d_min)
if vect[j] == last(d_max)
pop!(d_max)
elseif vect[j] == last(d_min)
pop!(d_min)
end
end
end
return roll_max, roll_min
end
function rolling_max(vect::Any, win_size::Integer)
roll_max = zeros(Float64, length(vect))
d_max = Deque{Float64}()
pushfirst!(d_max,vect[1])
@inbounds for i in 2:length(vect)
if vect[i] > vect[i-1]
@inbounds for k in 1:length(d_max)
if vect[i] >= first(d_max) popfirst!(d_max)
else break end
end
end
pushfirst!(d_max,vect[i])
j = i + 1 - win_size
if j >= 1
roll_max[i] = last(d_max)
if vect[j] == last(d_max)
pop!(d_max)
end
end
end
return roll_max
end
julia> x = rand(1000000);
julia> K = 9000;
julia> result2 = @btime movingmax($x, $K);
21.238 ms (4 allocations: 7.58 MiB)
julia> result3 = @btime rolling_max($x, $K);
24.017 ms (5 allocations: 7.64 MiB)
Yes they are close even for bigger N and K, I think the trick my algorithm is doing is no looking backward to the vector in each iteration, cause my Deque is keeping more information in a tuple. We have also a bit different memory access pattern (last/first in Deque). I have no idea why I did it this way. It is a rewrite from my old code in C#, but I recall I spent a lot of time on tuning this thing.
UPDATE: This package implements similar algorithm using custom ring buffers, it’s 2x faster than our approach.
Alright. With some help from @Jakub_Wronowski I fixed a bug in the code. I also switched to using a CircularDeque (this gives a big improvement in speed - more than 2x at least for some parameters). By doing that I could get close to the speed of the best libraries that are using a custom ring buffer. I will be satisfied with that for now!
Below is the current code:
function rolling_optima(vect::Any, win_size::Integer; debug=false)
roll_max = zeros(Float64, length(vect))
roll_min = zeros(Float64, length(vect))
opt_type = zeros(Int8, length(vect))
opt_index = zeros(Int64, length(vect))
d_max = CircularDeque{Int64}(win_size)
d_min = CircularDeque{Int64}(win_size)
win_forward = win_size
win_backward = win_forward*2
opt_ind = 0
#comp = 10
pushfirst!(d_max,1)
pushfirst!(d_min,1)
@inbounds for i in 2:length(vect)
if vect[i] > vect[i-1]
@inbounds for k in 1:length(d_max)
if vect[i] >= vect[first(d_max)]
popfirst!(d_max)
else break end
end
pushfirst!(d_max,i)
else
@inbounds for k in 1:length(d_min)
if vect[i] <= vect[first(d_min)]
popfirst!(d_min)
else break end
end
pushfirst!(d_min,i)
end
j = i + 1 - win_forward
if j >= 1
roll_max[i] = vect[last(d_max)]
roll_min[i] = vect[last(d_min)]
if j == last(d_max)
if roll_max[i] == roll_max[j]
if j-win_backward > 0
failed = false
for v in @view vect[j-win_backward:j-1]
if roll_max[j] < v
failed = true
break end
end
if !failed
opt_ind +=1
opt_index[opt_ind] = j
opt_type[opt_ind] = 1
end
end
end
pop!(d_max)
elseif j == last(d_min)
if roll_min[i] == roll_min[j]
if j-win_backward > 0
failed = false
for v in @view vect[j-win_backward:j-1]
if roll_min[j] > v
failed = true
break end
end
if !failed
opt_ind +=1
opt_index[opt_ind] = j
opt_type[opt_ind] = -1
end
end
end
pop!(d_min)
end
end
end
opt_index = opt_index[1:opt_ind]
opt_type = opt_type[1:opt_ind]
if debug
return opt_index, opt_type, roll_max, roll_min
else
return opt_index, opt_type
end
end
Hi everyone!
I attach the latest version of my code below. For many cases it almost matches the movmaxmin()
from MaxMinFilters.jl which was the fastest implementation I could find. My code is surely not as good but adds the functionality of finding optima with the use of rolling maximum/minimum.
function rolling_optima(vect::Any, win_size::Integer; debug=false)
roll_max = Vector{Float64}(undef, length(vect))
roll_min = Vector{Float64}(undef, length(vect))
opt_type = Vector{Int8}(undef, length(vect))
opt_index = Vector{Int64}(undef, length(vect))
d_max = CircularDeque{Int64}(win_size)
d_min = CircularDeque{Int64}(win_size)
win_forward = win_size
win_backward = win_forward*2
opt_ind = 0
pushfirst!(d_max,1)
pushfirst!(d_min,1)
@inbounds for i in 2:length(vect)
if vect[i] > vect[i-1]
@inbounds for k in 1:length(d_max)
if vect[i] >= vect[first(d_max)]
popfirst!(d_max)
else break end
end
pushfirst!(d_max,i)
else
@inbounds for k in 1:length(d_min)
if vect[i] <= vect[first(d_min)]
popfirst!(d_min)
else break end
end
pushfirst!(d_min,i)
end
j = i + 1 - win_forward
if j >= 1
roll_max[i] = vect[last(d_max)]
roll_min[i] = vect[last(d_min)]
if j == last(d_max)
if roll_max[i] == roll_max[j]
if j-win_backward > 0
failed = false
# @inbounds for v in @view vect[j-win_backward:j-1]
check_back = @view vect[j-1:-1:j-win_backward]
@inbounds for k in 1:(win_backward)
if roll_max[j] < check_back[k]
failed = true
break end
end
if !failed
opt_ind +=1
opt_index[opt_ind] = j
opt_type[opt_ind] = 1
end
end
end
pop!(d_max)
elseif j == last(d_min)
if roll_min[i] == roll_min[j]
if j-win_backward > 0
failed = false
# @inbounds for v in @view vect[j-win_backward:j-1]
check_back = @view vect[j-1:-1:j-win_backward]
@inbounds for k in 1:(win_backward)
if roll_min[j] > check_back[k]
failed = true
break end
end
if !failed
opt_ind +=1
opt_index[opt_ind] = j
opt_type[opt_ind] = -1
end
end
end
pop!(d_min)
end
else
roll_max[i] = 0
roll_min[i] = 0
end
end
opt_index = opt_index[1:opt_ind]
opt_type = opt_type[1:opt_ind]
if debug
return opt_index, opt_type, roll_max, roll_min
else
return opt_index, opt_type
end
end
Nice! Are you sure you pointed correct package? Aren’t it in MaxMinFilters.jl?
Sorry, my fault!