function's methods to read json file in nested DataFrame
function allflatnt(vnt::Vector{Dict{Symbol, Any}})
for i in eachindex(vnt)
for k in keys(vnt[i])
v=vnt[i][k]
if v isa Vector{<:Dict} || v isa Dict
return false
end
end
end
true
end
function allflatnt(d::Dict{Symbol, Any})
for k in keys(d)
if d[k] isa Vector{<:Dict} || d[k] isa Dict
return false
end
end
true
end
function nestdf(vd::Vector{Dict{Symbol, Any}})
for i in eachindex(vd)
for (k,v) in vd[i]
if v isa Vector{<:Dict} || v isa Dict
if allflatnt(v)
tdf=DataFrame(Tables.dictrowtable(v))
if names(tdf)==["first","second"]
rename!(tdf, [:first,:second].=>[:key,:value])
end
vd[i]=merge(vd[i], Dict(k=>tdf))
else
vd[i]=merge(vd[i], Dict(k=>nestdf(v)))
end
end
end
end
DataFrame(Tables.dictrowtable(vd))
end
function nestdf(d::Dict{Symbol, Any})
for (k,v) in d
if v isa Vector{<:Dict} || v isa Dict
if allflatnt(v)
tdf=DataFrame(Tables.dictrowtable(v))
if names(tdf)==["first","second"]
rename!(tdf, [:first,:second].=>[:key,:value])
end
d=merge(d, Dict(k=>tdf))
else
d=merge(d, Dict(k=>nestdf(v)))
end
end
end
rename!(DataFrame(Tables.dictrowtable(d)), [:first,:second].=>[:key,:value])
end
nestdf(jsobj::JSON3.Object)=nestdf(copy(jsobj))
nestdf(jsobj::JSON3.Array)=nestdf(copy(jsobj))
I adapted the previous scripts to be able to handle nested structures of JSON.Array and JSON.Object.
I only did some tests on the data corresponding to the “AAPL” ticker
using YFinance, JSON3, DataFrames
aapl_json=get_quoteSummary("AAPL")
Below are some possible “views” with the structure obtained by applying the nestdf()
function
julia> ndf=nestdf(aapl_json)
31×2 DataFrame
Row │ key value
│ Symbol DataFrame
─────┼───────────────────────────────────────────────────
1 │ cashflowStatementHistoryQuarterly 2×2 DataFrame
2 │ industryTrend 3×2 DataFrame
3 │ earningsTrend 2×2 DataFrame
4 │ incomeStatementHistory 2×2 DataFrame
5 │ price 31×2 DataFrame
6 │ upgradeDowngradeHistory 2×2 DataFrame
7 │ institutionOwnership 2×2 DataFrame
8 │ fundOwnership 2×2 DataFrame
9 │ summaryDetail 42×2 DataFrame
10 │ netSharePurchaseActivity 12×2 DataFrame
11 │ earnings 4×2 DataFrame
12 │ insiderHolders 2×2 DataFrame
13 │ calendarEvents 4×2 DataFrame
14 │ summaryProfile 13×2 DataFrame
15 │ majorDirectHolders 2×2 DataFrame
16 │ assetProfile 20×2 DataFrame
17 │ recommendationTrend 2×2 DataFrame
18 │ balanceSheetHistory 2×2 DataFrame
19 │ financialData 30×2 DataFrame
20 │ indexTrend 5×2 DataFrame
21 │ balanceSheetHistoryQuarterly 2×2 DataFrame
22 │ insiderTransactions 2×2 DataFrame
23 │ cashflowStatementHistory 2×2 DataFrame
24 │ earningsHistory 2×2 DataFrame
25 │ quoteType 13×2 DataFrame
26 │ sectorTrend 3×2 DataFrame
27 │ esgScores 36×2 DataFrame
28 │ secFilings 2×2 DataFrame
29 │ defaultKeyStatistics 39×2 DataFrame
30 │ majorHoldersBreakdown 5×2 DataFrame
31 │ incomeStatementHistoryQuarterly 2×2 DataFrame
julia> ndf.value[16]
20×2 DataFrame
Row │ key value
│ Symbol Any
─────┼──────────────────────────────────────────────────────────────
1 │ auditRisk 4
2 │ country United States
3 │ overallRisk 1
4 │ state CA
5 │ phone 408 996 1010
6 │ longBusinessSummary Apple Inc. designs, manufactures…
7 │ city Cupertino
8 │ zip 95014
9 │ industry Consumer Electronics
10 │ address1 One Apple Park Way
11 │ shareHolderRightsRisk 1
12 │ governanceEpochDate 1680307200
13 │ maxAge 86400
14 │ compensationRisk 5
15 │ compensationAsOfEpochDate 1672444800
16 │ boardRisk 1
17 │ sector Technology
18 │ fullTimeEmployees 164000
19 │ website https://www.apple.com
20 │ companyOfficers 10×9 DataFrame
julia> ndf.value[16].value[20]
10×9 DataFrame
Row │ fiscalYear unexercisedValue age name title ⋯
│ Int64? DataFrame Int64? String String ⋯
─────┼──────────────────────────────────────────────────────────────────────────────────────
1 │ 2022 3×2 DataFrame 61 Mr. Timothy D. Cook CEO & Director ⋯
2 │ 2022 3×2 DataFrame 59 Mr. Luca Maestri CFO & Sr. VP
3 │ 2022 3×2 DataFrame 58 Mr. Jeffrey E. Williams Chief Operating Off
4 │ 2022 3×2 DataFrame 58 Ms. Katherine L. Adams Sr. VP, Gen. Counse
5 │ 2022 3×2 DataFrame 55 Ms. Deirdre O'Brien Sr. VP of Retail ⋯
6 │ missing 3×2 DataFrame missing Mr. Chris Kondo Sr. Director of Cor
7 │ missing 3×2 DataFrame missing Mr. James Wilson Chief Technology Of
8 │ missing 3×2 DataFrame missing Ms. Mary Demby Chief Information O
9 │ missing 3×2 DataFrame missing Ms. Nancy Paxton Sr. Director of Inv ⋯
10 │ missing 3×2 DataFrame missing Mr. Greg Joswiak Sr. VP of Worldwide
5 columns omitted
julia> ndf.value[16].value[20].totalPay[1:3]
3-element Vector{Union{Missing, DataFrame}}:
3×2 DataFrame
Row │ key value
│ Symbol Union…
─────┼─────────────────────
1 │ fmt 16.43M
2 │ longFmt 16,425,933
3 │ raw 16425933
3×2 DataFrame
Row │ key value
│ Symbol Union…
─────┼────────────────────
1 │ fmt 5.02M
2 │ longFmt 5,019,783
3 │ raw 5019783
3×2 DataFrame
Row │ key value
│ Symbol Union…
─────┼────────────────────
1 │ fmt 5.02M
2 │ longFmt 5,018,337
3 │ raw 5018337
some refactoring
allflatnt(vd::Vector{Dict{Symbol, Any}})= all(allflatnt, vd)
allflatnt(d::Dict{Symbol, Any})=all(e->!(last(e) isa Vector{<:Dict} || last(e) isa Dict),d)
function nestdf(vd::Vector{Dict{Symbol, Any}})
DataFrame(Tables.dictrowtable(nestdict.(vd)))
end
function nestdf(d::Dict{Symbol, Any})
tdf=DataFrame(Tables.dictrowtable(nestdict(d)))
if names(tdf)==["first","second"]
rename!(tdf, [:first,:second].=>[:key,:value])
end
#tdf
end
function nestdict(d::Dict{Symbol, Any})
for (k,v) in d
if v isa Vector{<:Dict} || v isa Dict
if allflatnt(v)
tdf=DataFrame(Tables.dictrowtable(v))
if names(tdf)==["first","second"]
rename!(tdf, [:first,:second].=>[:key,:value])
end
d=merge(d, Dict(k=>tdf))
else
d=merge(d, Dict(k=>nestdf(v)))
end
end
end
d
end
nestdf(jsobj::JSON3.Object)=nestdf(copy(jsobj))
nestdf(jsobj::JSON3.Array)=nestdf(copy(jsobj))
using YFinance, JSON3, DataFrames
aapl_json=get_quoteSummary("AAPL")
ndf=nestdf(aapl_json)