here a discussion about a similar problem. You could try some of the suggested ideas.
here one of the proposals
allflatnt(vd::Vector{Dict{Symbol, Any}})= all(allflatnt, vd)
allflatnt(d::Dict{Symbol, Any})=all(e->!(last(e) isa Vector{<:Dict} || last(e) isa Dict),d)
function nestdf(vd::Vector{Dict{Symbol, Any}})
DataFrame(Tables.dictrowtable(nestdict.(vd)))
end
function nestdf(d::Dict{Symbol, Any})
tdf=DataFrame(Tables.dictrowtable(nestdict(d)))
if names(tdf)==["first","second"]
rename!(tdf, [:first,:second].=>[:key,:value])
end
#tdf
end
function nestdict(d::Dict{Symbol, Any})
for (k,v) in d
if v isa Vector{<:Dict} || v isa Dict
if allflatnt(v)
tdf=DataFrame(Tables.dictrowtable(v))
if names(tdf)==["first","second"]
rename!(tdf, [:first,:second].=>[:key,:value])
end
d=merge(d, Dict(k=>tdf))
else
d=merge(d, Dict(k=>nestdf(v)))
end
end
end
d
end
nestdf(jsobj::JSON3.Object)=nestdf(copy(jsobj))
nestdf(jsobj::JSON3.Array)=nestdf(copy(jsobj))
using JSON3, DataFrames
filtering the chunk £0-£500, after few seconds you get
julia> jsobj=JSON3.read("grantnav-20231111145216.json")
JSON3.Object{Vector{UInt8}, Vector{UInt64}} with 2 entries:
:license => "See dataset/license within each grant. This file also contains… :grants => Object[{…"
julia> df=nestdf(jsobj)
2×2 DataFrame
Row │ key value
│ Symbol Union…
─────┼────────────────────────────────────────────
1 │ grants 6243×29 DataFrame
2 │ license See dataset/license within each …
julia> df2=df.value[1]
6243×29 DataFrame
Row │ plannedDates filename currency description tit ⋯
│ DataFrame? String String String Str ⋯
──────┼───────────────────────────────────────────────────────────────────────────────────────────
1 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… 1 x ⋯
2 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… 1x
3 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… Que
4 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… Que
5 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… 1x ⋯
6 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… 1 x
7 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… Que
8 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… Que
9 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… Que ⋯
10 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… Que
11 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… Que
12 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… 1 x
13 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… Que ⋯
14 │ 1×5 DataFrame a001p00000tV2XpAAK.json GBP Funding under Sport England's NB… 1 x
⋮ │ ⋮ ⋮ ⋮ ⋮ ⋱
6231 │ missing a003W000007E4EqQAK.json GBP A Project To Commemorate Ww2 Thr… Gra
6232 │ missing a003W000007E4EqQAK.json GBP The 7th Branch Of The Normandy V… Gra
6233 │ missing a003W000007E4EqQAK.json GBP This Group Will Mark The Anniver… Gra ⋯
6234 │ missing a003W000007E4EqQAK.json GBP St Theresa'S Over 50s In St Hele… Gra
6235 │ missing a003W000007E4EqQAK.json GBP This Formally Constituted Theatr… Gra
6236 │ missing a003W000007E4EqQAK.json GBP This Group, Part Of The Territor… Gra
6237 │ missing a003W000007E4EqQAK.json GBP This Registered Charity Is Organ… Gra ⋯
6238 │ missing a003W000007E4EqQAK.json GBP This Informal Group Will Use The… Gra
6239 │ missing a003W000007E4EqQAK.json GBP This Informal Group Will Use An … Gra
6240 │ missing a003W000007E4EqQAK.json GBP This School Would Use An Award T… Gra
6241 │ missing a003W000007E4EqQAK.json GBP This Formal Group Would Use An A… Gra ⋯
6242 │ missing a003W000007E4EqQAK.json GBP The Project Is To Fund A Reunion… Gra
6243 │ missing a003W000007E4EqQAK.json GBP An Award Will Be Used To Commemo… Gra
25 columns and 6216 rows omitted
julia> describe(df2)[:,[begin,end]]
29×2 DataFrame
Row │ variable eltype
│ Symbol Type
─────┼───────────────────────────────────────────────────────────────────
1 │ plannedDates Union{Missing, DataFrame}
2 │ filename String
3 │ currency String
4 │ description String
5 │ title_and_description String
6 │ fromOpenCall Union{Missing, String}
7 │ awardDateDateOnly String
8 │ dataset DataFrame
9 │ grantProgramme DataFrame
10 │ beneficiaryLocation Union{Missing, DataFrame}
11 │ fundingOrganization DataFrame
12 │ title String
13 │ additional_data DataFrame
14 │ dateModified Union{Missing, String}
15 │ recipientOrganization DataFrame
16 │ Award Type Union{Missing, String}
17 │ simple_grant_type String
18 │ id String
19 │ dataType String
20 │ amountAwarded Union{Float64, Int64}
21 │ Recipient Org Location Union{Missing, DataFrame}
22 │ dataSource Union{Missing, String}
23 │ awardDate String
24 │ amountAppliedFor Union{Missing, Float64, Int64}
25 │ amountDisbursed Union{Missing, Int64}
26 │ Planned Date Union{Missing, DataFrame}
27 │ National Lottery Heritage Fund A… Union{Missing, String}
28 │ Project reference Union{Missing, String}
29 │ classifications Union{Missing, DataFrame}
julia> df2.dataset[1,1].value[1]
7×2 DataFrame
Row │ key value
│ Symbol Union…
─────┼────────────────────────────────────────────────────────
1 │ file_size 11072508
2 │ valid true
3 │ json /home/datastore/latest_datagette…
4 │ acceptable_license true
5 │ datetime_downloaded 2023-11-11T00:02:10+00:00
6 │ downloads true
7 │ file_type xlsx