Can someone shed some light on this? I just don’t understand what is going wrong here. This is on DataFrames master and Julia v0.7 and v0.6.2. I don’t think this was an issue before.
using DataFrames
function sumall(df,id)
s = 0.0
for subdf in groupby(df,id)
s += sum(subdf[:vals])
end
return s
end
df = DataFrame(id = vcat(zeros(Int,5),ones(Int,5)), vals = rand(10))
@code_warntype sumall(df,:id)
The @code_warntype
output is
julia> @code_warntype sumall(df,:id)
Variables:
df::DataFrame
id::Symbol
s::Any
#temp#@_5::Int64
subdf::Any
sort<optimized out>
skipmissing<optimized out>
Body:
begin
s::Any = 0.0
#= line 3 =#
# meta: location C:\Users\tbeason\.julia\v0.7\DataFrames\src\groupeddataframe/grouping.jl groupby 94
# meta: location C:\Users\tbeason\.julia\v0.7\DataFrames\src\groupeddataframe/grouping.jl #groupby#86 94
Core.SSAValue(9) = $(Expr(:invoke, MethodInstance for vect(::Symbol, ::Vararg{Symbol,N} where N), :(Base.vect), :(id)))::Array{Symbol,1}
# meta: location boot.jl Type 498
# meta: location boot.jl Type 507
# meta: location boot.jl @generated body
#= line 507 =#
goto 12
# meta: pop location
12:
# meta: pop locations (2)
# meta: location #groupby 0
goto 16
16:
goto 18
18:
goto 20
20:
Core.SSAValue(32) = $(Expr(:invoke, MethodInstance for #groupby#85(::Bool, ::Bool, ::Function, ::DataFrame, ::Array{Symbol,1}), :(DataFrames.:(#groupby#85)), false, false, :(DataFrames.groupby), :(df), Core.SSAValue(9)))::GroupedDataFrame
# meta: pop locations (3)
#temp#@_5::Int64 = 1
24:
# meta: location C:\Users\tbeason\.julia\v0.7\DataFrames\src\groupeddataframe/grouping.jl done 101
# meta: location sysimg.jl getproperty 8
Core.SSAValue(38) = (Base.getfield)(Core.SSAValue(32), :starts)::Array{Int64,1}
# meta: pop location
# meta: location array.jl length 138
Core.SSAValue(39) = (Base.arraylen)(Core.SSAValue(38))::Int64
# meta: pop location
# meta: location operators.jl > 250
# meta: location int.jl < 49
Core.SSAValue(41) = (Base.slt_int)(Core.SSAValue(39), #temp#@_5::Int64)::Bool
# meta: pop locations (3)
Core.SSAValue(3) = (Base.not_int)(Core.SSAValue(41))::Bool
unless Core.SSAValue(3) goto 48
Core.SSAValue(4) = $(Expr(:invoke, MethodInstance for next(::GroupedDataFrame, ::Int64), :(Base.next), Core.SSAValue(32), :(#temp#@_5)))::Tuple{Any,Int64}
subdf::Any = (Core.getfield)(Core.SSAValue(4), 1)::Any
#temp#@_5::Int64 = (Core.getfield)(Core.SSAValue(4), 2)::Int64
#= line 4 =#
Core.SSAValue(5) = s::Any
Core.SSAValue(6) = (Main.getindex)(subdf::Any, :vals)::Any
Core.SSAValue(7) = (Main.sum)(Core.SSAValue(6))::Any
s::Any = (Core.SSAValue(5) + Core.SSAValue(7))::Any
46:
goto 24
48:
#= line 6 =#
return s::Any
end::Any
Additional evidence pointing to the issue …
julia> gb1(df,id) = groupby(df,id)[1]
gb1 (generic function with 1 method)
julia> gb1(df,:id)
5×2 SubDataFrame{Array{Int64,1}}
│ Row │ id │ vals │
├─────┼────┼──────────┤
│ 1 │ 0 │ 0.64403 │
│ 2 │ 0 │ 0.310913 │
│ 3 │ 0 │ 0.208627 │
│ 4 │ 0 │ 0.261357 │
│ 5 │ 0 │ 0.820113 │
julia> @code_warntype gb1(df,:id)
Variables:
df::DataFrame
id::Symbol
sort<optimized out>
skipmissing<optimized out>
Body:
begin
# meta: location C:\Users\tbeason\.julia\v0.7\DataFrames\src\groupeddataframe/grouping.jl groupby 94
# meta: location C:\Users\tbeason\.julia\v0.7\DataFrames\src\groupeddataframe/grouping.jl #groupby#86 94
Core.SSAValue(3) = $(Expr(:invoke, MethodInstance for vect(::Symbol, ::Vararg{Symbol,N} where N), :(Base.vect), :(id)))::Array{Symbol,1}
# meta: location boot.jl Type 498
# meta: location boot.jl Type 507
# meta: location boot.jl @generated body
#= line 507 =#
goto 10
# meta: pop location
10:
# meta: pop locations (2)
# meta: location #groupby 0
goto 14
14:
goto 16
16:
goto 18
18:
Core.SSAValue(26) = $(Expr(:invoke, MethodInstance for #groupby#85(::Bool, ::Bool, ::Function, ::DataFrame, ::Array{Symbol,1}), :(DataFrames.:(#groupby#85)), false, false, :(DataFrames.groupby), :(df), Core.SSAValue(3)))::GroupedDataFrame
# meta: pop locations (3)
Core.SSAValue(1) = $(Expr(:invoke, MethodInstance for getindex(::GroupedDataFrame, ::Int64), :(Main.getindex), Core.SSAValue(26), 1))::Any
return Core.SSAValue(1)
end::Any
The weird thing to me is that groupby
itself is inferred correctly.
julia> @code_warntype groupby(df,:id)
Variables:
d::DataFrame
cols::Symbol
sort<optimized out>
skipmissing<optimized out>
Body:
begin
# meta: location C:\Users\tbeason\.julia\v0.7\DataFrames\src\groupeddataframe/grouping.jl #groupby#86 94
Core.SSAValue(1) = $(Expr(:invoke, MethodInstance for vect(::Symbol, ::Vararg{Symbol,N} where N), :(Base.vect), :(cols)))::Array{Symbol,1}
# meta: location boot.jl Type 498
# meta: location boot.jl Type 507
# meta: location boot.jl @generated body
#= line 507 =#
goto 9
# meta: pop location
9:
# meta: pop locations (2)
# meta: location #groupby 0
goto 13
13:
goto 15
15:
goto 17
17:
Core.SSAValue(24) = $(Expr(:invoke, MethodInstance for #groupby#85(::Bool, ::Bool, ::Function, ::DataFrame, ::Array{Symbol,1}), :(DataFrames.:(#groupby#85)), false, false, :(DataFrames.groupby), :(d), Core.SSAValue(1)))::GroupedDataFrame
# meta: pop locations (2)
return Core.SSAValue(24)
end::GroupedDataFrame