Parquet2.jl: type Nothing has no field meta_data

I am trying to read some data using Parquet2.jl. The data was generated by pyarrow.parquet around 1MB in size, so very small. Compression is Snappy, as by default in pyarrow.

I am getting a very long stack trace about type Nothing has no field meta_data:

ERROR: type Nothing has no field meta_data
Stacktrace:
  [1] thriftget(x::Nothing, s::Symbol, d::Nothing)
    @ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/utils.jl:36
  [2] Parquet2.Column(v::Vector{UInt8}, r::Parquet2.SchemaNode{Parquet2.ParqTree}, coldict::Dict{Vector{String}, Parquet2.Metadata.Column}, p::FilePathsBase.PosixPath, schp::Vector{String}, nvals::Int64; read_opts::Parquet2.ReadOptions)
    @ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/schema.jl:982
  [3] _construct_column(nm::String, r::Parquet2.SchemaNode{Parquet2.ParqTree}, coldict::Dict{Vector{String}, Parquet2.Metadata.Column}, fm::Parquet2.FileManager{FilePathsBase.PosixPath}, nvals::Int64, current_file::FilePathsBase.PosixPath; read_opts::Parquet2.ReadOptions)
    @ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/schema.jl:1238
  [4] _construct_column
    @ ~/.julia/packages/Parquet2/Txix3/src/schema.jl:1220 [inlined]
  [5] #99
    @ ~/.julia/packages/Parquet2/Txix3/src/schema.jl:1262 [inlined]
  [6] next
    @ ~/.julia/packages/Transducers/4xWio/src/library.jl:54 [inlined]
  [7] next(rf::Transducers.Reduction{Transducers.Map{typeof(Parquet2.name)}, Transducers.Reduction{Transducers.Map{Parquet2.var"#99#101"{FilePathsBase.PosixPath, Parquet2.FileManager{FilePathsBase.PosixPath}, Parquet2.SchemaNode{Parquet2.ParqTree}, Int64, Dict{Vector{String}, Parquet2.Metadata.Column}}}, Transducers.Reduction{Transducers.Map{Type{BangBang.NoBang.SingletonVector}}, Transducers.BottomRF{Transducers.AdHocRF{typeof(BangBang.collector), typeof(identity), typeof(BangBang.append!!), typeof(identity), typeof(identity), Nothing}}}}}, result::BangBang.SafeCollector{Vector{Parquet2.Column}}, input::Parquet2.SchemaNode{Parquet2.ParqList})
    @ Transducers ~/.julia/packages/Transducers/4xWio/src/library.jl:54
  [8] macro expansion
    @ ~/.julia/packages/Transducers/4xWio/src/core.jl:181 [inlined]
  [9] _foldl_iter(rf::Transducers.Reduction{Transducers.Map{typeof(Parquet2.name)}, Transducers.Reduction{Transducers.Map{Parquet2.var"#99#101"{FilePathsBase.PosixPath, Parquet2.FileManager{FilePathsBase.PosixPath}, Parquet2.SchemaNode{Parquet2.ParqTree}, Int64, Dict{Vector{String}, Parquet2.Metadata.Column}}}, Transducers.Reduction{Transducers.Map{Type{BangBang.NoBang.SingletonVector}}, Transducers.BottomRF{Transducers.AdHocRF{typeof(BangBang.collector), typeof(identity), typeof(BangBang.append!!), typeof(identity), typeof(identity), Nothing}}}}}, val::BangBang.SafeCollector{Vector{Parquet2.Column}}, iter::Base.ValueIterator{OrderedCollections.OrderedDict{String, Parquet2.SchemaNode}}, state::Int64, counter::Val{0})
    @ Transducers ~/.julia/packages/Transducers/4xWio/src/processes.jl:168
 [10] _foldl_iter(rf::Transducers.Reduction{Transducers.Map{typeof(Parquet2.name)}, Transducers.Reduction{Transducers.Map{Parquet2.var"#99#101"{FilePathsBase.PosixPath, Parquet2.FileManager{FilePathsBase.PosixPath}, Parquet2.SchemaNode{Parquet2.ParqTree}, Int64, Dict{Vector{String}, Parquet2.Metadata.Column}}}, Transducers.Reduction{Transducers.Map{Type{BangBang.NoBang.SingletonVector}}, Transducers.BottomRF{Transducers.AdHocRF{typeof(BangBang.collector), typeof(identity), typeof(BangBang.append!!), typeof(identity), typeof(identity), Nothing}}}}}, val::BangBang.SafeCollector{Vector{Parquet2.Column{Parquet2.ParqFloat64, Parquet2.ParqFloat64}}}, iter::Base.ValueIterator{OrderedCollections.OrderedDict{String, Parquet2.SchemaNode}}, state::Int64, counter::Val{1})
    @ Transducers ~/.julia/packages/Transducers/4xWio/src/processes.jl:169
 [11] macro expansion
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:159 [inlined]
 [12] macro expansion
    @ ~/.julia/packages/Transducers/4xWio/src/basics.jl:98 [inlined]
 [13] __foldl__(rf::Transducers.Reduction{Transducers.Map{typeof(Parquet2.name)}, Transducers.Reduction{Transducers.Map{Parquet2.var"#99#101"{FilePathsBase.PosixPath, Parquet2.FileManager{FilePathsBase.PosixPath}, Parquet2.SchemaNode{Parquet2.ParqTree}, Int64, Dict{Vector{String}, Parquet2.Metadata.Column}}}, Transducers.Reduction{Transducers.Map{Type{BangBang.NoBang.SingletonVector}}, Transducers.BottomRF{Transducers.AdHocRF{typeof(BangBang.collector), typeof(identity), typeof(BangBang.append!!), typeof(identity), typeof(identity), Nothing}}}}}, init::BangBang.SafeCollector{BangBang.NoBang.Empty{Vector{Union{}}}}, coll::Base.ValueIterator{OrderedCollections.OrderedDict{String, Parquet2.SchemaNode}})
    @ Transducers ~/.julia/packages/Transducers/4xWio/src/processes.jl:158
 [14] #transduce#141
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:519 [inlined]
 [15] transduce
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:508 [inlined]
 [16] transduce(xform::Transducers.Composition{Transducers.Map{typeof(Parquet2.name)}, Transducers.Composition{Transducers.Map{Parquet2.var"#99#101"{FilePathsBase.PosixPath, Parquet2.FileManager{FilePathsBase.PosixPath}, Parquet2.SchemaNode{Parquet2.ParqTree}, Int64, Dict{Vector{String}, Parquet2.Metadata.Column}}}, Transducers.Map{Type{BangBang.NoBang.SingletonVector}}}}, f::Transducers.AdHocRF{typeof(BangBang.collector), typeof(identity), typeof(BangBang.append!!), typeof(identity), typeof(identity), Nothing}, init::BangBang.SafeCollector{BangBang.NoBang.Empty{Vector{Union{}}}}, coll::Base.ValueIterator{OrderedCollections.OrderedDict{String, Parquet2.SchemaNode}}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Transducers ~/.julia/packages/Transducers/4xWio/src/processes.jl:502
 [17] transduce
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:500 [inlined]
 [18] _collect(xf::Transducers.Composition{Transducers.Map{typeof(Parquet2.name)}, Transducers.Map{Parquet2.var"#99#101"{FilePathsBase.PosixPath, Parquet2.FileManager{FilePathsBase.PosixPath}, Parquet2.SchemaNode{Parquet2.ParqTree}, Int64, Dict{Vector{String}, Parquet2.Metadata.Column}}}}, coll::Base.ValueIterator{OrderedCollections.OrderedDict{String, Parquet2.SchemaNode}}, #unused#::Transducers.SizeStable, #unused#::Base.HasLength)
    @ Transducers ~/.julia/packages/Transducers/4xWio/src/processes.jl:806
 [19] collect
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:802 [inlined]
 [20] collect
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:803 [inlined]
 [21] |>
    @ ./operators.jl:907 [inlined]
 [22] Parquet2.RowGroup(fm::Parquet2.FileManager{FilePathsBase.PosixPath}, r::Parquet2.SchemaNode{Parquet2.ParqTree}, rg::Parquet2.Metadata.RowGroup, ptree::Parquet2.PartitionNode{FilePathsBase.PosixPath}; current_file::FilePathsBase.PosixPath, parallel_column_loading::Nothing)
    @ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/schema.jl:1261
 [23] (::Parquet2.var"#115#116"{Parquet2.Dataset{Parquet2.FileManager{FilePathsBase.PosixPath}}, FilePathsBase.PosixPath})(rg::Parquet2.Metadata.RowGroup)
    @ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/dataset.jl:159
 [24] next
    @ ~/.julia/packages/Transducers/4xWio/src/library.jl:54 [inlined]
 [25] macro expansion
    @ ~/.julia/packages/Transducers/4xWio/src/core.jl:181 [inlined]
 [26] _foldl_array
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:187 [inlined]
 [27] __foldl__
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:182 [inlined]
 [28] #transduce#141
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:519 [inlined]
 [29] transduce
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:508 [inlined]
 [30] transduce(xform::Transducers.Composition{Transducers.Map{Parquet2.var"#115#116"{Parquet2.Dataset{Parquet2.FileManager{FilePathsBase.PosixPath}}, FilePathsBase.PosixPath}}, Transducers.Enumerate{Int64}}, f::Transducers.AdHocRF{Transducers.var"#143#145"{MicroCollections.UndefVector{Union{}, typeof(MicroCollections.default_factory)}}, typeof(identity), Transducers.var"#rf#144", typeof(identity), typeof(identity), Nothing}, init::MicroCollections.UndefVector{Union{}, typeof(MicroCollections.default_factory)}, coll::Vector{Parquet2.Metadata.RowGroup}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Transducers ~/.julia/packages/Transducers/4xWio/src/processes.jl:502
 [31] transduce(xform::Transducers.Composition{Transducers.Map{Parquet2.var"#115#116"{Parquet2.Dataset{Parquet2.FileManager{FilePathsBase.PosixPath}}, FilePathsBase.PosixPath}}, Transducers.Enumerate{Int64}}, f::Transducers.AdHocRF{Transducers.var"#143#145"{MicroCollections.UndefVector{Union{}, typeof(MicroCollections.default_factory)}}, typeof(identity), Transducers.var"#rf#144", typeof(identity), typeof(identity), Nothing}, init::MicroCollections.UndefVector{Union{}, typeof(MicroCollections.default_factory)}, coll::Vector{Parquet2.Metadata.RowGroup})
    @ Transducers ~/.julia/packages/Transducers/4xWio/src/processes.jl:500
 [32] _collect(xf::Transducers.Map{Parquet2.var"#115#116"{Parquet2.Dataset{Parquet2.FileManager{FilePathsBase.PosixPath}}, FilePathsBase.PosixPath}}, arr::Vector{Parquet2.Metadata.RowGroup}, #unused#::Transducers.SizeStable, #unused#::Base.HasShape{1})
    @ Transducers ~/.julia/packages/Transducers/4xWio/src/processes.jl:822
 [33] collect(xf::Transducers.Map{Parquet2.var"#115#116"{Parquet2.Dataset{Parquet2.FileManager{FilePathsBase.PosixPath}}, FilePathsBase.PosixPath}}, coll::Vector{Parquet2.Metadata.RowGroup})
    @ Transducers ~/.julia/packages/Transducers/4xWio/src/processes.jl:802
 [34] collect
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:803 [inlined]
 [35] |>
    @ ./operators.jl:907 [inlined]
 [36] append!(ds::Parquet2.Dataset{Parquet2.FileManager{FilePathsBase.PosixPath}}, p::FilePathsBase.PosixPath; verbose::Bool, check::Bool)
    @ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/dataset.jl:160
 [37] append!
    @ ~/.julia/packages/Parquet2/Txix3/src/dataset.jl:155 [inlined]
 [38] #133
    @ ~/.julia/packages/Parquet2/Txix3/src/dataset.jl:226 [inlined]
 [39] next
    @ ~/.julia/packages/Transducers/4xWio/src/library.jl:54 [inlined]
 [40] next
    @ ~/.julia/packages/Transducers/4xWio/src/library.jl:290 [inlined]
 [41] macro expansion
    @ ~/.julia/packages/Transducers/4xWio/src/core.jl:181 [inlined]
 [42] _foldl_array
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:187 [inlined]
 [43] __foldl__
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:182 [inlined]
 [44] #transduce#141
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:519 [inlined]
 [45] transduce
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:508 [inlined]
 [46] transduce(xform::Transducers.Composition{Transducers.Filter{Parquet2.var"#132#134"{Parquet2.Dataset{Parquet2.FileManager{FilePathsBase.PosixPath}}}}, Transducers.Map{Parquet2.var"#133#135"{Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}, Parquet2.Dataset{Parquet2.FileManager{FilePathsBase.PosixPath}}}}}, f::Transducers.Completing{typeof(Transducers.right)}, init::Vector{Any}, coll::Vector{FilePathsBase.PosixPath}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Transducers ~/.julia/packages/Transducers/4xWio/src/processes.jl:502
 [47] transduce
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:500 [inlined]
 [48] #foldxl#157
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:1015 [inlined]
 [49] foldxl
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:1015 [inlined]
 [50] #foldl#158
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:1018 [inlined]
 [51] foldl
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:1018 [inlined]
 [52] #foldxl#139
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:442 [inlined]
 [53] foldxl
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:442 [inlined]
 [54] #137
    @ ~/.julia/packages/Transducers/4xWio/src/processes.jl:441 [inlined]
 [55] |>
    @ ./operators.jl:907 [inlined]
 [56] appendall!(ds::Parquet2.Dataset{Parquet2.FileManager{FilePathsBase.PosixPath}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/dataset.jl:226
 [57] appendall!
    @ ~/.julia/packages/Parquet2/Txix3/src/dataset.jl:225 [inlined]
 [58] Parquet2.Dataset(fm::Parquet2.FileManager{FilePathsBase.PosixPath})
    @ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/dataset.jl:107
 [59] Parquet2.Dataset(p::FilePathsBase.PosixPath; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/dataset.jl:112
 [60] Parquet2.Dataset(p::FilePathsBase.PosixPath)
    @ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/dataset.jl:110
 [61] Parquet2.Dataset(p::String; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/dataset.jl:116
 [62] Dataset
    @ ~/.julia/packages/Parquet2/Txix3/src/dataset.jl:116 [inlined]
 [63] #readfile#147
    @ ~/.julia/packages/Parquet2/Txix3/src/dataset.jl:415 [inlined]
 [64] readfile(a::String)
    @ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/dataset.jl:415

Any idea why this is the case?

Just tested: compression doesn’t matter.

[2] in your stack trace routes me to this call here: src/schema.jl · master · Expanding Man / Parquet2.jl · GitLab
which in turn ends up in [1] here src/utils.jl · master · Expanding Man / Parquet2.jl · GitLab.

You could try to monkey patch that with something like

function Parquet2.thriftget(x, s, d)
    hasfield(typeof(x), s) || return d
    return getfield(x, s)
end

This does not work, but I overrode Parquet2.thriftget instead. However, I get a second error:

MethodError: no method matching parqbasetype(::Parquet2.Metadata.BitsType, ::Nothing)

Closest candidates are:

parqbasetype(::Parquet2.Metadata.BitsType)

@ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/schema.jl:315

parqbasetype(::Parquet2.Metadata.BitsType, !Matched::Parquet2.ParquetType)

@ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/schema.jl:314

parqbasetype(::Parquet2.Metadata.BitsType, !Matched::Integer)

@ Parquet2 ~/.julia/packages/Parquet2/Txix3/src/schema.jl:315

Stack trace

Here is what happened, the most recent locations are first:

    parqbasetype(s::Parquet2.Metadata.SchemaElement) @ schema.jl:312
    _legacy_parqtype(s::Parquet2.Metadata.SchemaElement) @ schema.jl:339
    parqtype(s::Parquet2.Metadata.SchemaElement; support_legacy::Bool) @ schema.jl:403
    parqtype @ schema.jl:398
    Parquet2.SchemaNode(s::Parquet2.Metadata.SchemaElement, children::OrderedCollections.OrderedDict{Any, Any}; support_legacy::Bool) @ schema.jl:478
    SchemaNode @ schema.jl:470
    Parquet2.SchemaNode(ss::Vector{Parquet2.Metadata.SchemaElement}, i::Int64; support_legacy::Bool) @ schema.jl:493
    (::Parquet2.var"#78#79"{Bool, Vector{Parquet2.Metadata.SchemaElement}})(j::Int64) @ 
    iterate(::Base.Generator{Vector{Int64}, Parquet2.var"#78#79"{Bool, Vector{Parquet2.Metadata.SchemaElement}}}) @ generator.jl:47
    _all(f::OrderedCollections.var"#2#4", itr::Base.Generator{Vector{Int64}, Parquet2.var"#78#79"{Bool, Vector{Parquet2.Metadata.SchemaElement}}}, #unused#::Colon) @ reduce.jl:1282
    all(f::Function, itr::Base.Generator{Vector{Int64}, Parquet2.var"#78#79"{Bool, Vector{Parquet2.Metadata.SchemaElement}}}) @ reduce.jl:1278
    OrderedCollections.OrderedDict(kv::Base.Generator{Vector{Int64}, Parquet2.var"#78#79"{Bool, Vector{Parquet2.Metadata.SchemaElement}}}) @ ordered_dict.jl:75
    Parquet2.SchemaNode(ss::Vector{Parquet2.Metadata.SchemaElement}, i::Int64; support_legacy::Bool) @ schema.jl:493
    SchemaNode @ schema.jl:489
    Parquet2.Dataset(fm::Parquet2.FileManager{FilePathsBase.PosixPath}) @ dataset.jl:100
    Parquet2.Dataset(p::FilePathsBase.PosixPath; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}) @ dataset.jl:112
    Parquet2.Dataset(p::FilePathsBase.PosixPath) @ dataset.jl:110
    Parquet2.Dataset(p::String; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}) @ dataset.jl:116
    Dataset @ dataset.jl:116
    #readfile#147 @ dataset.jl:415
    readfile(a::String) @ dataset.jl:415
    This cell: line 1

    TIME_DF = Parquet2.readfile("./results/time.parquet")

Sorry, this time the trace is from Pluto so the format is broken

Maybe override this one here src/schema.jl · master · Expanding Man / Parquet2.jl · GitLab
with something like

function Parquet2.parqbasetype(t::Meta.BitsType, pt::ParquetType)
   v = valuesize(pt)
   parqbasetype(t, isnothing(v) ? 0 : v)
end

Edit: I am not sure if that’s gonna fix it, because if you look what happens downstream in parqbasetype is that a zero-sized byte array type will be generated, which sounds wrong.

I never used a Parquet file myself before, so I am also just reading the code and guessing. Maybe you would be better served opening an issue.

It doesn’t fix it. Thank you for your help, but I’ll not continue down this path this time.

Also, I’ve not used GitLab before, so I cannot find where I can open an issue.

Here Issues · Expanding Man / Parquet2.jl · GitLab

1 Like