VERSION = v"0.6.2"
I have a triangle/jagged array of a data structure that contains a vector d
and a coefficient. I want to find the elementwise minimum and maximum of all the d
s of the datastructures in the jagged array. I am comparing three methods. I realized that the more verbose the code, the faster it runs. Also the most succint one uses all the memory on my machine (16GB) even for a small array.
# minmax.jl
mutable struct Data{F}
d::Vector{F}
coef::F
end
function mmcat(triarr)
dds = [[d.d for d in dd] for dd in triarr]
lo = min.(vcat(dds...)...)
hi = max.(vcat(dds...)...)
lo, hi
end
function mmfor(triarr)
itr = Iterators.flatten(triarr)
lo = first(itr).d
hi = first(itr).d
for i in itr
lo = min.(lo, i.d)
hi = max.(hi, i.d)
end
lo, hi
end
function mmitr(triarr)
itr = Iterators.flatten(triarr)
lo = mapreduce(d->d.d, (d1, d2)->min.(d1, d2), first(itr).d, itr)
hi = mapreduce(d->d.d, (d1, d2)->max.(d1, d2), first(itr).d, itr)
lo, hi
end
tridata(n) = [[Data(rand(0:9999, 3), 1) for i in 1:j] for j in 1:n]
td3 = tridata(3)
@show mmfor(td3) mmitr(td3) mmcat(td3)
N = 100
tdN = tridata(N)
@show sizeof(tdN)
@show (N*(N+1)/2) * 4 * 8
@show @time mmfor(tdN)
@show @time mmitr(tdN)
@show @time mmcat(tdN)
When I run I get the following output.
$ julia minmax.jl
mmfor(td3) = ([3028, 148, 657], [9595, 8595, 9345])
mmitr(td3) = ([3028, 148, 657], [9595, 8595, 9345])
mmcat(td3) = ([3028, 148, 657], [9595, 8595, 9345])
sizeof(tdN) = 800
((N * (N + 1)) / 2) * 4 * 8 = 161600.0
0.000699 seconds (15.24 k allocations: 1.239 MiB)
@time(mmfor(tdN)) = ([2, 4, 1], [9999, 9998, 9993])
0.000742 seconds (20.21 k allocations: 1.387 MiB)
@time(mmitr(tdN)) = ([2, 4, 1], [9999, 9998, 9993])
^C
I have to cancel because all the 16GB are taken up by mmcat
, not to mention the time it takes even for N=30
!
What’s happening with mmcat
? What is the julian way of doing this?