I don’t know if this is a real case, but, in theory, we could have a row of data like a0=Dict("a"=>0, "b"=>0)
In this case, I wonder how it should be treated.
how to unnest no-nest?
using DataFrames, TidierData
function expanDict_df(ndf, col; suff='_')
df2nt(sdf)=(;zip(Symbol.(string.(col,suff,keys(sdf))),values(sdf))...)
rexp=findall(er-> er[col] isa Dict, eachrow(ndf))
rt=Tables.dictrowtable(vcat(df2nt.(ndf[rexp, col])...))
edf=hcat(ndf[rexp,Not(col)],DataFrame(rt))
iexp=findall.(>(1),map(r-> [er isa Dict ? length(er) : 1 for er in r],values.(eachrow(edf))))
edfp=mapreduce(r->flatten(edf[r:r,:],iexp[r]), vcat,1: nrow(edf))
nedfp=select(filter(er-> !isa(er[col] , Dict), ndf),Not(col))
vcat(nedfp,edfp,cols=:union)
end
julia> a0=Dict("a"=>0, "b"=>0)
Dict{String, Int64} with 2 entries:
"b" => 0
"a" => 0
julia> a1=Dict("a"=>1, "b"=>Dict("c"=>1, "d"=>2)); a2=Dict("a"=>2, "b"=>Dict("c"=>2))
Dict{String, Any} with 2 entries:
"b" => Dict("c"=>2)
"a" => 2
julia> a3=Dict("a"=>3, "b"=>Dict("c"=>3))
Dict{String, Any} with 2 entries:
"b" => Dict("c"=>3)
"a" => 3
julia> a=[a0;a1;a2;a3]
4-element Vector{Dict{String}}:
Dict("b" => 0, "a" => 0)
Dict{String, Any}("b" => Dict("c" => 1, "d" => 2), "a" => 1)
Dict{String, Any}("b" => Dict("c" => 2), "a" => 2)
Dict{String, Any}("b" => Dict("c" => 3), "a" => 3)
julia> df=DataFrame(a)
4×2 DataFrame
Row │ b a
│ Any Int64
─────┼─────────────────────────────
1 │ 0 0
2 │ Dict("c"=>1, "d"=>2) 1
3 │ Dict("c"=>2) 2
4 │ Dict("c"=>3) 3
julia> expanDict_df(df, :b)
4×3 DataFrame
Row │ a b_c b_d
│ Int64 Int64? Int64?
─────┼─────────────────────────
1 │ 0 missing missing
2 │ 1 1 2
3 │ 2 2 missing
4 │ 3 3 missing
julia> udfb= @unnest_wider(df, b)
ERROR: Column b contains neither dictionaries nor arrays nor DataFrames
Stacktrace:
[1] error(s::String)
@ Base .\error.jl:35
[2] unnest_wider(df::DataFrame, cols::Vector{Symbol}; names_sep::Nothing)
@ TidierData C:\Users\sprmn\.julia\packages\TidierData\A0yDz\src\nests.jl:66
[3] top-level scope
@ C:\Users\sprmn\.julia\packages\TidierData\A0yDz\src\nests.jl:95
julia> a=[a1;a0;a2;a3]
4-element Vector{Dict{String}}:
Dict{String, Any}("b" => Dict("c" => 1, "d" => 2), "a" => 1)
Dict("b" => 0, "a" => 0)
Dict{String, Any}("b" => Dict("c" => 2), "a" => 2)
Dict{String, Any}("b" => Dict("c" => 3), "a" => 3)
julia> df=DataFrame(a)
4×2 DataFrame
Row │ b a
│ Any Int64
─────┼─────────────────────────────
1 │ Dict("c"=>1, "d"=>2) 1
2 │ 0 0
3 │ Dict("c"=>2) 2
4 │ Dict("c"=>3) 3
julia> expanDict_df(df, :b)
4×3 DataFrame
Row │ a b_c b_d
│ Int64 Int64? Int64?
─────┼─────────────────────────
1 │ 0 missing missing
2 │ 1 1 2
3 │ 2 2 missing
4 │ 3 3 missing
julia> udfb= @unnest_wider(df, b)
ERROR: MethodError: Cannot `convert` an object of type Int64 to an object of type String
Closest candidates are:
convert(::Type{String}, ::StringManipulation.Decoration)
@ StringManipulation C:\Users\sprmn\.julia\packages\StringManipulation\bMZ2A\src\decorations.jl:365
convert(::Type{String}, ::Base.JuliaSyntax.Kind)
@ Base C:\workdir\base\JuliaSyntax\src\kinds.jl:975
convert(::Type{String}, ::String)
@ Base essentials.jl:321
...
Stacktrace:
[1] setindex!(h::Dict{String, Nothing}, v0::Nothing, key0::Int64)
@ Base .\dict.jl:367
[2] push!(s::Set{String}, x::Int64)
@ Base .\set.jl:103
[3] union!(s::Set{String}, itr::Base.OneTo{Int64})
@ Base .\abstractset.jl:106
[4] unnest_wider(df::DataFrame, cols::Vector{Symbol}; names_sep::Nothing)
@ TidierData C:\Users\sprmn\.julia\packages\TidierData\A0yDz\src\nests.jl:50
[5] top-level scope
@ C:\Users\sprmn\.julia\packages\TidierData\A0yDz\src\nests.jl:95
maybe this is more realistic.
How should the case of a second (or even more) level of nesting be treated?
julia> a1=Dict("a"=>1, "b"=>Dict("c"=>1, "d"=>2)); a2=Dict("a"=>2, "b"=>Dict("c"=>2))
Dict{String, Any} with 2 entries:
"b" => Dict("c"=>2)
"a" => 2
julia> a1=Dict("a"=>1, "b"=>Dict("c"=>1, "d"=>2))
Dict{String, Any} with 2 entries:
"b" => Dict("c"=>1, "d"=>2)
"a" => 1
julia> a2=Dict("a"=>2, "b"=>Dict("c"=>2))
Dict{String, Any} with 2 entries:
"b" => Dict("c"=>2)
"a" => 2
julia> a3=Dict("a"=>3, "b"=>Dict("d"=>3))
Dict{String, Any} with 2 entries:
"b" => Dict("d"=>3)
"a" => 3
julia> a4=Dict("a"=>4, "b"=>Dict("c"=>41, "d"=>Dict("da"=>411, "db"=>Dict("dbc"=>4111))));
julia> a=[a1;a2;a3;a4]
4-element Vector{Dict{String, Any}}:
Dict("b" => Dict("c" => 1, "d" => 2), "a" => 1)
Dict("b" => Dict("c" => 2), "a" => 2)
Dict("b" => Dict("d" => 3), "a" => 3)
Dict("b" => Dict{String, Any}("c" => 41, "d" => Dict{String, Any}("da" => 411, "db" => Dict("dbc" => 4111))), "a" => 4)
julia> df=DataFrame(a)
4×2 DataFrame
Row │ b a
│ Dict… Int64
─────┼──────────────────────────────────────────
1 │ Dict("c"=>1, "d"=>2) 1
2 │ Dict("c"=>2) 2
3 │ Dict("d"=>3) 3
4 │ Dict{String, Any}("c"=>41, "d"=>… 4
julia> udfb= @unnest_wider(df, b)
4×3 DataFrame
Row │ a c d
│ Int64 Int64? Any
─────┼───────────────────────────────────────────────────
1 │ 1 1 2
2 │ 2 2 missing
3 │ 3 missing 3
4 │ 4 41 Dict{String, Any}("da"=>411, "db…
julia> udfbd= @unnest_wider(udfb, d)
ERROR: Column d contains neither dictionaries nor arrays nor DataFrames
Stacktrace:
[1] error(s::String)
@ Base .\error.jl:35
[2] unnest_wider(df::DataFrame, cols::Vector{Symbol}; names_sep::Nothing)
@ TidierData C:\Users\sprmn\.julia\packages\TidierData\A0yDz\src\nests.jl:66
[3] top-level scope
@ C:\Users\sprmn\.julia\packages\TidierData\A0yDz\src\nests.jl:95