Hi,
I think you are doing this a little bit to complicated. Especially the inner loops over columns m
and n
aren’t necessary.
Using Query.jl following solution is much faster:
function looptest7(data,loopsize)
summary = DataFrame([Int64, Int64, Float64],[:cfg, :count, :ave],0)
for i in 1:loopsize
data.m .= fld.(data.x, i)
for j in 1:loopsize
cfg = i * j
data.n .= fld.(data.y, j)
x = @from i in data begin
@group i.z by {i.m, i.n} into g
@where length(g) == cfg
@select minimum(g)
@collect
end
push!(summary, [cfg length(x) mean(x)])
end
end
return summary
end
julia> @btime looptest7(data,4)
42.480 ms (184858 allocations: 13.03 MiB)
The not shared looptest6
was a similar to looptest5
but without using Query.jl and only using groupby
. looptest7
is twice as fast, because groupby
on DataFrames isn’t typestable… (see: Type of groupby(df,id) elements are Any)