df = DataFrame(A=[1,2,3], B=[:x,:y,:z], C=[(1,3), (-2,1),(4,4)])
dfh=DataFrame(s=0,id=0,a=missings(NamedTuple,1),r=missings(NamedTuple,1), m=missings(NamedTuple,1))
push!(dfh,(s=1,id=1,a=copy(df[1,:]),r=missing, m=missing))
push!(dfh,(s=2,id=2,a=copy(df[2,:]),r=missing, m=missing))
push!(dfh,(s=3,id=3,a=copy(df[3,:]),r=missing, m=missing))
#modify
function mh(dfh,row,nt)
step=maximum(dfh.s)+1
push!(dfh,(s=step,id=row,a=merge(only(dfh[dfh.id.==row,:a]),nt),r=missing, m=only(dfh[dfh.id.==row,:a])))
end
mh(dfh,2,(B=:Y,))
#remove
function rh(dfh,row)
step=maximum(dfh.s)+1
push!(dfh,(s=step,id=row,a=missing,r=only(dfh[dfh.id.==row,:a]), m=missing))
end
rh(dfh,1)
#add
function ah(dfh,nt)
step=maximum(dfh.s)+1
mid=maximum(dfh.id)+1
push!(dfh,(s=step,id=mid,a=nt,r=missing, m=missing))
end
ah(dfh,(A=4,B=:w,C=(9,9)))
#-----
step(n)=combine(groupby(dfh[1:n+1,:],:id),:a=>x->[last(x)])
julia> step(6)
5×2 DataFrame
Row │ id a_function
│ Int64 NamedTupl…?
─────┼─────────────────────────────────────
1 │ 0 missing
2 │ 1 missing
3 │ 2 (A = 2, B = :Y, C = (-2, 1))
4 │ 3 (A = 3, B = :z, C = (4, 4))
5 │ 4 (A = 4, B = :w, C = (9, 9))
julia> step(4)
4×2 DataFrame
Row │ id a_function
│ Int64 NamedTupl…?
─────┼─────────────────────────────────────
1 │ 0 missing
2 │ 1 (A = 1, B = :x, C = (1, 3))
3 │ 2 (A = 2, B = :Y, C = (-2, 1))
4 │ 3 (A = 3, B = :z, C = (4, 4))
PS
I ask for clarification on the following error obtained in the first attempt to define the step()
function
julia> dfh
7×5 DataFrame
Row │ s id a r ⋯
│ Int64 Int64 NamedTup…? NamedTup…? ⋯
─────┼───────────────────────────────────────────────────────────────────────────
1 │ 0 0 missing missing ⋯
2 │ 1 1 (A = 1, B = :x, C = (1, 3)) missing
3 │ 2 2 (A = 2, B = :y, C = (-2, 1)) missing
4 │ 3 3 (A = 3, B = :z, C = (4, 4)) missing
5 │ 4 2 (A = 2, B = :Y, C = (-2, 1)) missing ⋯
6 │ 5 1 missing (A = 1, B = :x, C = (1, 3))
7 │ 6 4 (A = 4, B = :w, C = (9, 9)) missing
1 column omitted
julia> combine(groupby(dfh,:id),:a=>last)
ERROR: ArgumentError: function must return only single-column values, or only multiple-column values