I understand that to get type stable code using DataFrames.jl, you need to use Tables.columntable
to convert the DF into NamedTuple
. I was working on a Pluto notebook that tried to show the memory allocation behavior, but eventually I got pretty lost, as I cannot build a consistent understanding of how Julia compilation works.
The test script I used and its output under Julia 1.10.4 is show below:
using DataFrames, InteractiveUtils
function f_nested(df::DataFrame)
res = 0
for i in 1:nrow(df)
for j in 1:ncol(df)
res += df[i, j]
end
end
res
end
function f_bycol(df::DataFrame)
res = 0
for col in eachcol(df)
res += sum(col)
end
res
end
function f_byrow(df::DataFrame)
res = 0
for row in eachrow(df)
res += sum(row)
end
res
end
function run()
dfi_small = DataFrame(:a => Int.(collect(1:30)))
dff_small = DataFrame(:a => Float32.(collect(1:30)))
dfi = DataFrame(:a => Int.(collect(1:100)))
dff = DataFrame(:a => Float32.(collect(1:100)))
@code_warntype f_nested(dff)
f_nested(dfi)
f_bycol(dfi)
f_byrow(dfi)
f_nested(dff) #it's necessary to warmup again for the float version or we will show memory being allocated from compilation
f_bycol(dff)
f_byrow(dff)
"""
f_nested(dfi_small) allocated $(@allocated f_nested(dfi_small))
f_bycol(dfi_small) allocated $(@allocated f_bycol(dfi_small))
f_byrow(dfi_small) allocated $(@allocated f_byrow(dfi_small))
f_nested(dff_small) allocated $(@allocated f_nested(dff_small))
f_bycol(dff_small) allocated $(@allocated f_bycol(dff_small))
f_byrow(dff_small) allocated $(@allocated f_byrow(dff_small))
f_nested(dfi) allocated $(@allocated f_nested(dfi))
f_bycol(dfi) allocated $(@allocated f_bycol(dfi))
f_byrow(dfi) allocated $(@allocated f_byrow(dfi))
f_nested(dff) allocated $(@allocated f_nested(dff))
f_bycol(dff) allocated $(@allocated f_bycol(dff))
f_byrow(dff) allocated $(@allocated f_byrow(dff))
"""
end
print(run())
MethodInstance for f_nested(::DataFrame)
from f_nested(df::DataFrame) @ Main C:\Users\kirby\MyDrive\Documents\fourthwave\scripts\misc\test_allocated.jl:3
Arguments
#self#::Core.Const(f_nested)
df::DataFrame
Locals
@_3::Union{Nothing, Tuple{Int64, Int64}}
res::Any
@_5::Union{Nothing, Tuple{Int64, Int64}}
i::Int64
j::Int64
Body::Any
1 β (res = 0)
β %2 = Main.nrow(df)::Int64
β %3 = (1:%2)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
β (@_3 = Base.iterate(%3))
β %5 = (@_3 === nothing)::Bool
β %6 = Base.not_int(%5)::Bool
βββ goto #7 if not %6
2 β %8 = @_3::Tuple{Int64, Int64}
β (i = Core.getfield(%8, 1))
β %10 = Core.getfield(%8, 2)::Int64
β %11 = Main.ncol(df)::Int64
β %12 = (1:%11)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
β (@_5 = Base.iterate(%12))
β %14 = (@_5 === nothing)::Bool
β %15 = Base.not_int(%14)::Bool
βββ goto #5 if not %15
3 β %17 = @_5::Tuple{Int64, Int64}
β (j = Core.getfield(%17, 1))
β %19 = Core.getfield(%17, 2)::Int64
β %20 = res::Any
β %21 = Base.getindex(df, i, j)::Any
β (res = %20 + %21)
β (@_5 = Base.iterate(%12, %19))
β %24 = (@_5 === nothing)::Bool
β %25 = Base.not_int(%24)::Bool
βββ goto #5 if not %25
4 β goto #3
5 β (@_3 = Base.iterate(%3, %10))
β %29 = (@_3 === nothing)::Bool
β %30 = Base.not_int(%29)::Bool
βββ goto #7 if not %30
6 β goto #2
7 β return res
f_nested(dfi_small) allocated 0
f_bycol(dfi_small) allocated 0
f_byrow(dfi_small) allocated 960
f_nested(dff_small) allocated 960
f_bycol(dff_small) allocated 32
f_byrow(dff_small) allocated 2400
f_nested(dfi) allocated 1104
f_bycol(dfi) allocated 32
f_byrow(dfi) allocated 4304
f_nested(dff) allocated 3200
f_bycol(dff) allocated 32
f_byrow(dff) allocated 8000
There are two observations I donβt understand
- even though
@code_warntype
shows type unstable code, Julia 1.10 is able to optimize for small integer arrays (but not small float arrays) to eliminate memory allocations. - even though only one method instance is created for each of the 3 functions, calling it with either an integer or float DataFrame will result in two separate compilations, as suggested by amount of memory allocated. You can verify this by commenting out the second set of warmup calls.
- EDIT: I believe this could be caused by compilation of int and float versions of
sum
- EDIT: I believe this could be caused by compilation of int and float versions of