some questions:
-
the package name DataFrameDBs looks weird, maybe something more informative?
-
what does the reuse_row=true condition do for CSV.Rows?
-
would be better if show_progress=true can tell how much time is needed instead of how much time has passed
-
when there is missing value in a column, how to convert it from string to Int64?
-
when I run the code
c_best_bid = parse.(Int64, test.best_bid)
materialize(c_best_bid[1:10])
I got the error message (test.best_bid has no missing value):
ERROR: ArgumentError: invalid base 10 digit '.' in "15.6"
Stacktrace:
[1] parse at ./parse.jl:240 [inlined]
[2] _broadcast_getindex_evalf at ./broadcast.jl:625 [inlined]
[3] _broadcast_getindex at ./broadcast.jl:608 [inlined]
[4] getindex at ./broadcast.jl:558 [inlined]
[5] macro expansion at ./broadcast.jl:888 [inlined]
[6] macro expansion at ./simdloop.jl:77 [inlined]
[7] copyto! at ./broadcast.jl:887 [inlined]
[8] copyto! at ./broadcast.jl:842 [inlined]
[9] materialize! at ./broadcast.jl:801 [inlined]
[10] eval_on_range(::NamedTuple{(:best_bid_raw,),Tuple{DataFrameDBs.FlatStringsVectors.FlatStringsVector{Union{Missing, String}}}}, ::DataFrameDBs.BroadcastExecutor{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(parse),Tuple{Base.RefValue{Type{Int64}},Array{Union{Missing, String},1}}},NamedTuple{(:best_bid_raw,),Tuple{Array{Union{Missing, String},1}}},Array{Int64,1}}, ::Base.LogicalIndex{Int64,Array{Bool,1}}) at /home/yifanliu/.julia/packages/DataFrameDBs/A2bCW/src/tables/broadcast.jl:130
[11] _proj_elem_eval_on_range at /home/yifanliu/.julia/packages/DataFrameDBs/A2bCW/src/tables/projection.jl:128 [inlined]
[12] _proj_eval_on_range at /home/yifanliu/.julia/packages/DataFrameDBs/A2bCW/src/tables/projection.jl:136 [inlined]
[13] eval_on_range at /home/yifanliu/.julia/packages/DataFrameDBs/A2bCW/src/tables/projection.jl:152 [inlined]
[14] iterate(::DataFrameDBs.BlocksIterator{DataFrameDBs.DataReader,NamedTuple{(:best_bid_raw,),Tuple{DataFrameDBs.BlockStream}},NamedTuple{(:best_bid_raw,),Tuple{DataFrameDBs.FlatStringsVectors.FlatStringsVector{Union{Missing, String}}}},DataFrameDBs.ProjectionExecutor{NamedTuple{(:a,),Tuple{DataFrameDBs.BroadcastExecutor{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(parse),Tuple{Base.RefValue{Type{Int64}},Array{Union{Missing, String},1}}},NamedTuple{(:best_bid_raw,),Tuple{Array{Union{Missing, String},1}}},Array{Int64,1}}}}},DataFrameDBs.SelectionExecutor{Tuple{DataFrameDBs.RangeToProcess{UnitRange{Int64}}}},Tuple{},Tuple{Symbol}}, ::Nothing) at /home/yifanliu/.julia/packages/DataFrameDBs/A2bCW/src/io/blocksiterator.jl:117
[15] iterate(::DataFrameDBs.BlocksIterator{DataFrameDBs.DataReader,NamedTuple{(:best_bid_raw,),Tuple{DataFrameDBs.BlockStream}},NamedTuple{(:best_bid_raw,),Tuple{DataFrameDBs.FlatStringsVectors.FlatStringsVector{Union{Missing, String}}}},DataFrameDBs.ProjectionExecutor{NamedTuple{(:a,),Tuple{DataFrameDBs.BroadcastExecutor{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1},Nothing,typeof(parse),Tuple{Base.RefValue{Type{Int64}},Array{Union{Missing, String},1}}},NamedTuple{(:best_bid_raw,),Tuple{Array{Union{Missing, String},1}}},Array{Int64,1}}}}},DataFrameDBs.SelectionExecutor{Tuple{DataFrameDBs.RangeToProcess{UnitRange{Int64}}}},Tuple{},Tuple{Symbol}}) at /home/yifanliu/.julia/packages/DataFrameDBs/A2bCW/src/io/blocksiterator.jl:99
[16] materialize(::DFColumn{Int64}) at /home/yifanliu/.julia/packages/DataFrameDBs/A2bCW/src/tables/materialization.jl:48
[17] top-level scope at REPL[31]:1