I’m not sure if the script does exactly what you ask, but… it does it quickly
from 100x to 600x Faster, but ...
julia> using DataFrames
julia> using Statistics
julia> using BenchmarkTools
julia> LEN = 10^4 # 10^7
10000
julia> data = DataFrame(
timestamp=cumsum(rand(1:3,LEN)),
values = cumsum(rand(1:0.1:2,LEN))
);
julia> coalesceNothing(x,default) = isnothing(x) ? default : x
coalesceNothing (generic function with 1 method)
julia> getFirstIndex(x,days,i) = coalesceNothing(findfirst((days .+ x .- x[i]).>0),1)
getFirstIndex (generic function with 1 method)
julia> function findls(t, ls, le)
for idx in ls:le
t[le]-t[idx]<10 && return idx
end
end
findls (generic function with 1 method)
julia> function windows(t,v,len,tr)
r=Vector{Float64}(undef,len)
ls=1
for i in 1:len
if t[i]-t[ls] >= tr
ls=findls(t, ls, i)
end
r[i] = mean(v[ls:i])
end
r
end
windows (generic function with 1 method)
julia> @benchmark data.mean10 = [mean(data.values[i:-1:getFirstIndex(data.timestamp,10,i)]) for i in 1:LEN]
BenchmarkTools.Trial: 97 samples with 1 evaluation.
Range (min … max): 32.966 ms … 83.545 ms ┊ GC (min … max): 5.18% … 14.00%
Time (median): 47.397 ms ┊ GC (median): 10.13%
Time (mean ± σ): 50.598 ms ± 10.511 ms ┊ GC (mean ± σ): 11.83% ± 5.08%
█▇▄ ▂▂
▅▃▁▁▁▃▁▃▆▅▅█▅███▅██▅▁▅█▆▅▃▃▆▁▆▁▅▁▁▅▃▃▁▃▃▃▁▁▃▃▃▁▃▃▁▁▁▁▃▁▁▃▁▃ ▁
33 ms Histogram: frequency by time 81.7 ms <
Memory estimate: 55.97 MiB, allocs estimate: 78980.
julia> @benchmark data.w = windows(data.timestamp,data.values,LEN, 10)
BenchmarkTools.Trial: 10000 samples with 1 evaluation.
Range (min … max): 322.300 μs … 8.207 ms ┊ GC (min … max): 0.00% … 0.00%
Time (median): 398.100 μs ┊ GC (median): 0.00%
Time (mean ± σ): 488.760 μs ± 273.640 μs ┊ GC (mean ± σ): 5.37% ± 10.45%
▃▆█▅▄▃▄▄▃▃▂▁ ▁
████████████████▇▇▇▇█▇██████▇▇▇▇▇▇▆▇▇▇▇▅▅▅▆▃▆▆▇▆▆▆▅▆▆▆▆▅▅▅▅▅▅ █
322 μs Histogram: log(frequency) by time 1.55 ms <
Memory estimate: 1.06 MiB, allocs estimate: 10003.
julia> LEN = 10^5
100000
julia> data = DataFrame(
timestamp=cumsum(rand(1:3,LEN)),
values = cumsum(rand(1:0.1:2,LEN))
);
julia> @benchmark data.mean10 = [mean(data.values[i:-1:getFirstIndex(data.timestamp,10,i)]) for i in 1:LEN]
BenchmarkTools.Trial: 2 samples with 1 evaluation.
Range (min … max): 2.504 s … 3.208 s ┊ GC (min … max): 2.56% … 2.65%
Time (median): 2.856 s ┊ GC (median): 2.61%
Time (mean ± σ): 2.856 s ± 497.907 ms ┊ GC (mean ± σ): 2.61% ± 0.07%
█ █
█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█ ▁
2.5 s Histogram: frequency by time 3.21 s <
Memory estimate: 1.60 GiB, allocs estimate: 798980.
julia> @benchmark data.w = windows(data.timestamp,data.values,LEN, 10)
BenchmarkTools.Trial: 1004 samples with 1 evaluation.
Range (min … max): 3.795 ms … 26.344 ms ┊ GC (min … max): 0.00% … 0.00%
Time (median): 4.354 ms ┊ GC (median): 0.00%
Time (mean ± σ): 4.967 ms ± 1.828 ms ┊ GC (mean ± σ): 5.41% ± 8.36%
▇█▅▃▁ ▁▄▆▅▄▃▂ ▁▁▁
█████████████▇███████▆▇▆▆▄▁▁▄▆▁▁▄▆▁▅▄▅▄▁▄▆▆▆▁▆▇▄▁▁▄▁▁▁▁▄▁▄ █
3.8 ms Histogram: log(frequency) by time 12 ms <
Memory estimate: 10.53 MiB, allocs estimate: 100003.
julia> 2856/4.354
655.9485530546624
julia> LEN = 10^6
1000000
julia> data = DataFrame(
timestamp=cumsum(rand(1:3,LEN)),
values = cumsum(rand(1:0.1:2,LEN))
);
julia> @benchmark data.w = windows(data.timestamp,data.values,LEN, 10)
BenchmarkTools.Trial: 104 samples with 1 evaluation.
Range (min … max): 41.656 ms … 117.542 ms ┊ GC (min … max): 5.18% … 4.44%
Time (median): 45.485 ms ┊ GC (median): 6.18%
Time (mean ± σ): 48.892 ms ± 10.045 ms ┊ GC (mean ± σ): 5.93% ± 1.55%
█▆▅▁
▃█████▆▃▃▃▃▃▃▃▃▃▃▅▃▁▃▃▃▁▁▃▃▁▁▁▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃ ▃
41.7 ms Histogram: frequency by time 93.9 ms <
Memory estimate: 105.48 MiB, allocs estimate: 1000003.
julia> LEN = 10^7
10000000
julia> data = DataFrame(
timestamp=cumsum(rand(1:3,LEN)),
values = cumsum(rand(1:0.1:2,LEN))
);
julia> @benchmark data.w = windows(data.timestamp,data.values,LEN, 10)
BenchmarkTools.Trial: 11 samples with 1 evaluation.
Range (min … max): 446.053 ms … 520.803 ms ┊ GC (min … max): 5.41% … 4.92%
Time (median): 455.523 ms ┊ GC (median): 5.65%
Time (mean ± σ): 462.309 ms ± 20.360 ms ┊ GC (mean ± σ): 5.63% ± 0.37%
█ ▃
▇▁▁▁▁█▁█▇▁▁▇▁▁▇▁▁▁▇▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▇ ▁
446 ms Histogram: frequency by time 521 ms <
Memory estimate: 1.03 GiB, allocs estimate: 10000003.
when LEN=10^5 windows/getFirstIndex = 1500X
julia> LEN = 10^5
100000
function windows(t,v,len,tr)
r=Vector{Float64}(undef,len)
ls=1
for i in 1:len
if t[i]-t[ls] >= tr
ls=findls(t, ls, i)
end
r[i] = mean(@view v[ls:i])
end
r
end
julia> data = DataFrame(
timestamp=cumsum(rand(1:3,LEN)),
values = cumsum(rand(1:0.1:2,LEN))
);
julia> @benchmark data.w = windows(data.timestamp,data.values,LEN, 10)
BenchmarkTools.Trial: 2582 samples with 1 evaluation.
Range (min … max): 1.367 ms … 20.850 ms ┊ GC (min … max): 0.00% … 0.00%
Time (median): 1.782 ms ┊ GC (median): 0.00%
Time (mean ± σ): 1.932 ms ± 829.656 μs ┊ GC (mean ± σ): 1.62% ± 6.28%
▃▆▆▆█▇██▇▆▆▆▅▄▃▃▂▁▂▁▁ ▁ ▂
███████████████████████████▇▆▇▇▇▇▅▆▅▆▅▆▇▇▅▇▇▆▇▆▇▆█▆▅▆▅▆▅▅▁▅ █
1.37 ms Histogram: log(frequency) by time 4.01 ms <
Memory estimate: 781.31 KiB, allocs estimate: 3.
julia> @benchmark data.mean10 = [mean(data.values[i:-1:getFirstIndex(data.timestamp,10,i)]) for i in 1:LEN]
BenchmarkTools.Trial: 2 samples with 1 evaluation.
Range (min … max): 2.654 s … 3.031 s ┊ GC (min … max): 3.06% … 2.96%
Time (median): 2.843 s ┊ GC (median): 3.00%
Time (mean ± σ): 2.843 s ± 266.208 ms ┊ GC (mean ± σ): 3.00% ± 0.07%
█ █
█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█ ▁
2.65 s Histogram: frequency by time 3.03 s <
Memory estimate: 1.60 GiB, allocs estimate: 798982.
julia> 2843/1.782
1595.3984287317621