JuliaDB loading data

I am trying out Julia DB, but am unable to read data. Any ideas what is going wrong here?
loadndsparse is running into an error (the error also happens when I sue Float64 for all types).

using JuliaDB 
using CSV
#using Tables 
using DelimitedFiles
#using DataFrames 

fileToBeRead="C:\\temp\\test0.csv"
bindir="c:\\temp\\bindata"

mt=rand(5_000,5);
mt[:,3]=Int.(trunc.(Int,100*mt[:,3]));
mt[:,4]=Int.(trunc.(Int,10000*mt[:,4]));
hdr=reshape(map(i->string("Column",i),1:size(mt,2)),1,size(mt,2));

isfile(fileToBeRead)&&rm(fileToBeRead)

open(fileToBeRead, "w") do io
    writedlm(io,hdr,',')    
    writedlm(io,mt,',')
end

#read file with CSV
df=CSV.read(fileToBeRead);
sum(df[1])

#read file with JuliaDB
@time csvfiles = glob(fileToBeRead);

!isdir(bindir) && mkdir(bindir)
@time loadndsparse(csvfiles, output=bindir,
    header_exists=true,
    chunks=80,
    colparsers=Dict(1=>Float64, 2=>Float32, 3=>Int,4=>Int,5=>Float64),
    colnames=["Column1", "Column2", "Column3", "Column4","Column5"],
    datacols=[1,2,3,4,5])
    


julia>     @time loadndsparse(csvfiles, output=bindir,
               header_exists=true,
                   chunks=80,
                       colparsers=Dict(1=>Float64, 2=>Float64, 3=>Float64,4=>Float64,5=>Float64),
                           colnames=["Column1", "Column2", "Column3", "Column4","Column5"],
                               datacols=[1,2,3,4,5])
ERROR: UndefRefError: access to undefined reference
getproperty(::Any, ::Symbol) at .\sysimg.jl:18
get_wrkrips() at C:\Users\bernhard.konig\.julia\packages\MemPool\tlPqB\src\datastore.jl:65
run_work_thunk(::typeof(MemPool.get_wrkrips), ::Bool) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\process_messages.jl:56
#remotecall_fetch#148(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Distributed.LocalProcess) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:364
remotecall_fetch(::Function, ::Distributed.LocalProcess) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:364
#remotecall_fetch#152(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Int64) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:406
remotecall_fetch at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:406 [inlined]
get_workers_at(::Sockets.IPv4) at C:\Users\bernhard.konig\.julia\packages\MemPool\tlPqB\src\datastore.jl:95
affinity(::MemPool.FileRef) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\chunks.jl:84
affinity(::Dagger.Chunk{Any,MemPool.FileRef}) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\chunks.jl:50
affinity(::Dagger.Thunk) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\thunk.jl:52
pop_with_affinity!(::Dagger.Context, ::Array{Dagger.Thunk,1}, ::Dagger.OSProc, ::Bool) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\scheduler.jl:97
compute_dag(::Dagger.Context, ::Dagger.Thunk) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\scheduler.jl:36
compute(::Dagger.Context, ::Dagger.Thunk) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\compute.jl:25
#fromchunks#47(::Nothing, ::Int64, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Array{Dagger.Thunk,1}) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\jDAlJ\src\table.jl:148
fromchunks(::Array{Dagger.Thunk,1}) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\jDAlJ\src\table.jl:129
offset_index!(::JuliaDB.DNDSparse{Tuple{Int64},NamedTuple{(:Column1, :Column2, :Column3, :Column4, :Column5),NTuple{5,Float64}}}, ::Int64) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\jDAlJ\src\io.jl:28
#_loadtable#188(::Int64, ::String, ::Bool, ::Array{Any,1}, ::Bool, ::Bool, ::Base.Iterators.Pairs{Symbol,Any,NTuple{4,Symbol},NamedTuple{(:header_exists, :colparsers, :colnames, :datacols),Tuple{Bool,Dict{Int64,DataType},Array{String,1},Array{Int64,1}}}}, ::Function, ::Type, ::Array{String,1}) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\jDAlJ\src\io.jl:153
#_loadtable at .\none:0 [inlined]
#loadndsparse#187 at C:\Users\bernhard.konig\.julia\packages\JuliaDB\jDAlJ\src\io.jl:82 [inlined]
(::getfield(JuliaDB, Symbol("#kw##loadndsparse")))(::NamedTuple{(:output, :header_exists, :chunks, :colparsers, :colnames, :datacols),Tuple{String,Bool,Int64,Dict{Int64,DataType},Array{String,1},Array{Int64,1}}}, ::typeof(loadndsparse), ::Array{String,1}) at .\none:0
top-level scope at util.jl:156
eval(::Module, ::Any) at .\boot.jl:319
eval_user_input(::Any, ::REPL.REPLBackend) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\REPL\src\REPL.jl:85
macro expansion at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\REPL\src\REPL.jl:117 [inlined]
(::getfield(REPL, Symbol("##28#29")){REPL.REPLBackend})() at .\task.jl:259
Stacktrace:
 [1] #remotecall_fetch#148(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Distributed.LocalProcess) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:365
 [2] remotecall_fetch(::Function, ::Distributed.LocalProcess) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:364
 [3] #remotecall_fetch#152(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Int64) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:406
 [4] remotecall_fetch at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:406 [inlined]
 [5] get_workers_at(::Sockets.IPv4) at C:\Users\bernhard.konig\.julia\packages\MemPool\tlPqB\src\datastore.jl:95
 [6] affinity(::MemPool.FileRef) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\chunks.jl:84
 [7] affinity(::Dagger.Chunk{Any,MemPool.FileRef}) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\chunks.jl:50
 [8] affinity(::Dagger.Thunk) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\thunk.jl:52
 [9] pop_with_affinity!(::Dagger.Context, ::Array{Dagger.Thunk,1}, ::Dagger.OSProc, ::Bool) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\scheduler.jl:97
 [10] compute_dag(::Dagger.Context, ::Dagger.Thunk) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\scheduler.jl:36
 [11] compute(::Dagger.Context, ::Dagger.Thunk) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\compute.jl:25
 [12] #fromchunks#47(::Nothing, ::Int64, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Array{Dagger.Thunk,1}) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\jDAlJ\src\table.jl:148
 [13] fromchunks(::Array{Dagger.Thunk,1}) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\jDAlJ\src\table.jl:129
 [14] offset_index!(::JuliaDB.DNDSparse{Tuple{Int64},NamedTuple{(:Column1, :Column2, :Column3, :Column4, :Column5),NTuple{5,Float64}}}, ::Int64) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\jDAlJ\src\io.jl:28
 [15] #_loadtable#188(::Int64, ::String, ::Bool, ::Array{Any,1}, ::Bool, ::Bool, ::Base.Iterators.Pairs{Symbol,Any,NTuple{4,Symbol},NamedTuple{(:header_exists, :colparsers, :colnames, :datacols),Tuple{Bool,Dict{Int64,DataType},Array{String,1},Array{Int64,1}}}}, ::Function, ::Type, ::Array{String,1}) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\jDAlJ\src\io.jl:153
 [16] #_loadtable at .\none:0 [inlined]
 [17] #loadndsparse#187 at C:\Users\bernhard.konig\.julia\packages\JuliaDB\jDAlJ\src\io.jl:82 [inlined]
 [18] (::getfield(JuliaDB, Symbol("#kw##loadndsparse")))(::NamedTuple{(:output, :header_exists, :chunks, :colparsers, :colnames, :datacols),Tuple{String,Bool,Int64,Dict{Int64,DataType},Array{String,1},Array{Int64,1}}}, ::typeof(loadndsparse), ::Array{String,1}) at .\none:0
 [19] top-level scope at util.jl:156

Please, post import Pgk; Pkg.status()!

See this issue:

and Julian Samaroo’s advice to fix this (for now):

In case anyone runs into this issue, the following Pkg command should help: ] pin IndexedTables@v0.12.1 StructArrays@v0.3.4

Might help?

 versioninfo()
Julia Version 1.0.4
Commit 38e9fb7f80 (2019-05-16 03:38 UTC)
Platform Info:  OS: Windows (x86_64-w64-mingw32)
  CPU: Intel(R) Core(TM) i7-6600U CPU @ 2.60GHz
  WORD_SIZE: 64  LIBM: libopenlibm
  LLVM: libLLVM-6.0.0 (ORCJIT, skylake)Environment:
  JULIA_BINDIR = C:\Julia-1.X\bin\
  JULIA_HOME = C:\Julia-1.X\bin\
  JULIA_EDITOR = "C:\Program Files\Microsoft VS Code\Code.exe"


(X) pkg> st
Project X v0.1.0
    Status `m:\X\Project.toml`
  [336ed68f] CSV v0.5.8        
  [a93c6f00] DataFrames v0.18.4
  [a93385a2] JuliaDB v0.12.0   
  [bd369af6] Tables v0.2.8     

this does not seem to help me with my issue, I now have this status (but the same error)

(X) pkg> st
Project Xv0.1.0
    Status `m:\X\Project.toml`
  [336ed68f] CSV v0.5.8
  [a93c6f00] DataFrames v0.18.4
  [6deec6e2] IndexedTables v0.12.1 ⚲
  [a93385a2] JuliaDB v0.12.0
  [09ab397b] StructArrays v0.3.4 ⚲
  [bd369af6] Tables v0.2.8

(X) pkg> 

notably, this is working (which may be sufficient for me for the time being)

x9=loadtable(fileToBeRead)
save(x9,"C:\\temp\\bindir2")

I do have a follow up question though.
I have trouble defining the column type.
Should the code below work?

The first snipped does not yield the intended result, the last line yields an error for me…

dd=Dict("Column1"=>Float64,"Column2"=>Float64,"Column3"=>Int)
x9=loadtable(fileToBeRead,colparsers=dd)
select(x9,3) #is Float64 not Int, Why?

elt=[TextParse.Numeric{Float64},TextParse.Numeric{Float64},TextParse.Numeric{Float64},TextParse.Numeric{Int},TextParse.Numeric{Float64}]
x9=loadtable(fileToBeRead,colparsers=elt)
#ERROR: MethodError: no method matching fromtype(::Type{TextParse.Numeric{Float64}})

Since it is a MemPool error, I first want to make sure you are on the master version! There have been some fixes in the recent past specifically on an error message like the one above.

About the column types: try a concrete type like Int64!

Ok, the eltype issue is resolved.
It was a bad idea to write the CSV with Floats ending on “.0”. Apparently these cannot be easily parsed as Ints. If I write a dataframe instead of a matrix, things work ou.t


using JuliaDB
using CSV
#using Tables
using DelimitedFiles
using TextParse
using DataFrames

#currently we need these pinned to avoid an issue with loadndsparse
#=
    add IndexedTables
    add StructArrays
    pin IndexedTables@v0.12.1 StructArrays@v0.3.4
=#

fileToBeRead="C:\\temp\\test0.csv"
bindir="c:\\temp\\bindata"

mt=rand(5_000,5);
df=DataFrame(mt)
df[:,3]=Int.(trunc.(Int,100*mt[:,3]));
df[:,4]=Int.(trunc.(Int,10000*mt[:,4]));
#hdr=reshape(map(i->string("Column",i),1:size(mt,2)),1,size(mt,2));

isfile(fileToBeRead)&&rm(fileToBeRead)
CSV.write(fileToBeRead,df)
#open(fileToBeRead, "w") do io
#    writedlm(io,hdr,',')    
#    writedlm(io,mt,',')
#end

#read file with CSV
df_read=CSV.read(fileToBeRead,types=[Float64,Float64,Int64,Int64,Float64]);
sum(df_read[1])
eltype(df_read[3]) #ok

#read file with JuliaDB
@time csvfiles = glob(fileToBeRead);

!isdir(bindir) && mkdir(bindir)
@time loadndsparse(csvfiles, output=bindir,
    header_exists=true,
    chunks=80,
    colparsers=Dict(1=>Float64, 2=>Float32, 3=>Int,4=>Int,5=>Float64),
    colnames=["Column1", "Column2", "Column3", "Column4","Column5"],
    datacols=[1,2,3,4,5])
    
#=
    @time loadndsparse(csvfiles, output=bindir,
    header_exists=true,
    chunks=80,
    colparsers=Dict(1=>Float64, 2=>Float64, 3=>Float64,4=>Float64,5=>Float64),
    colnames=["Column1", "Column2", "Column3", "Column4","Column5"],
    datacols=[1,2,3,4,5])
=#    

x9=loadtable(fileToBeRead,colparsers=TextParse.Numeric{Float64})

dd=Dict("Column1"=>Float64,"Column2"=>Float64,"Column3"=>Int64)
x9=loadtable(fileToBeRead,colparsers=dd)
JuliaDB.select(x9,3) #ok -> Int64

master is not helping me with loadndsparse…
Thank you for the support.


julia> @time loadndsparse(csvfiles, output=bindir,
           header_exists=true,
               chunks=80,
                   colparsers=Dict(1=>Float64, 2=>Float32, 3=>Int64,4=>Int64,5=>Float64),
                       colnames=["Column1", "Column2", "Column3", "Column4","Column5"],
                           datacols=[1,2,3,4,5])
ERROR: UndefRefError: access to undefined reference`

# ]st yields
[336ed68f] CSV v0.5.8
  [a93c6f00] DataFrames v0.18.4
  [a93385a2] JuliaDB v0.12.0+ #master (https://github.com/JuliaComputing/JuliaDB.jl.git)
  [bd369af6] Tables v0.2.8
  [e0df1984] TextParse v0.9.1```

Glad to hear about the eltype. Your example of a generated data set that is being read back into JuliaDB may be a good test to be added.

I have to be pedantic here. Are you sure you ran Pkg.@pkg_str("add MemPool#master"), restarted Julia and tried again?

Why do provide colnames explicitly and set all columns to be data explicitly? They do not affect you load…

Ah, I thought you meant master of JuliaDB.
Ok, adding MemPool master now.
status yields:

  [336ed68f] CSV v0.5.8
  [a93c6f00] DataFrames v0.18.4
  [a93385a2] JuliaDB v0.12.0+ #master (https://github.com/JuliaComputing/JuliaDB.jl.git)
  [f9f48841] MemPool v0.2.0+ #master (https://github.com/JuliaComputing/MemPool.jl.git)
  [bd369af6] Tables v0.2.8
  [e0df1984] TextParse v0.9.1

here is what I get (you are right about colnames, that is/was superfluous)

julia> @time csvfiles = glob(fileToBeRead);
  0.638517 seconds (878.43 k allocations: 41.860 MiB, 9.71% gc time)

julia> !isdir(bindir) && mkdir(bindir)
false

julia> @time loadndsparse(csvfiles, output=bindir,
           header_exists=true,
               chunks=80,
                   colparsers=Dict(1=>Float64, 2=>Float32, 3=>Int64,4=>Int64,5=>Float64),
                       datacols=[1,2,3,4,5])
ERROR: UndefRefError: access to undefined reference
getproperty(::Any, ::Symbol) at .\sysimg.jl:18
get_wrkrips() at C:\Users\bernhard.konig\.julia\packages\MemPool\PUncN\src\datastore.jl:65
n64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:364
remotecall_fetch(::Function, ::Distributed.LocalProcess) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:364#remotecall_fetch#152(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Int64) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:406
remotecall_fetch at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:406 [inlined]get_workers_at(::Sockets.IPv4) at C:\Users\bernhard.konig\.julia\packages\MemPool\PUncN\src\datastore.jl:95
affinity(::MemPool.FileRef) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\chunks.jl:84
affinity(::Dagger.Chunk{Any,MemPool.FileRef}) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\chunks.jl:50
affinity(::Dagger.Thunk) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\thunk.jl:52
pop_with_affinity!(::Dagger.Context, ::Array{Dagger.Thunk,1}, ::Dagger.OSProc, ::Bool) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\scheduler.jl:97
compute_dag(::Dagger.Context, ::Dagger.Thunk) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\scheduler.jl:36
compute(::Dagger.Context, ::Dagger.Thunk) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\compute.jl:25
#fromchunks#47(::Nothing, ::Int64, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Array{Dagger.Thunk,1}) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\PRJbx\src\table.jl:148
fromchunks(::Array{Dagger.Thunk,1}) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\PRJbx\src\table.jl:129offset_index!(::JuliaDB.DNDSparse{Tuple{Int64},NamedTuple{(:x1, :x2, :x3, :x4, :x5),Tuple{Float64,Float32,Int64,Int64,Float64}}}, ::Int64) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\PRJbx\src\io.jl:28
#_loadtable#188(::Int64, ::String, ::Bool, ::Array{Any,1}, ::Bool, ::Bool, ::Base.Iterators.Pairs{Symbol,Any,Tuple{Symbol,Symbol,Symbol},NamedTuple{(:header_exists, :colparsers, :datacols),Tuple{Bool,Dict{Int64,DataType},Array{Int64,1}}}}, ::Function, ::Type, ::Array{String,1}) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\PRJbx\src\io.jl:153
#_loadtable at .\none:0 [inlined]#loadndsparse#187 at C:\Users\bernhard.konig\.julia\packages\JuliaDB\PRJbx\src\io.jl:82 [inlined]
(::getfield(JuliaDB, Symbol("#kw##loadndsparse")))(::NamedTuple{(:output, :header_exists, :chunks, :colparsers, :datacols),Tuple{String,Bool,Int64,Dict{Int64,DataType},Array{Int64,1}}}, ::typeof(loadndsparse), ::Array{String,1}) at .\none:0
top-level scope at util.jl:156eval(::Module, ::Any) at .\boot.jl:319
eval_user_input(::Any, ::REPL.REPLBackend) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\REPL\src\REPL.jl:85
macro expansion at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\REPL\src\REPL.jl:117 [inlined]
(::getfield(REPL, Symbol("##28#29")){REPL.REPLBackend})() at .\task.jl:259
Stacktrace:
 [1] #remotecall_fetch#148(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Distributed.LocalProcess) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:365
 [2] remotecall_fetch(::Function, ::Distributed.LocalProcess) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:364
 [3] #remotecall_fetch#152(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Function, ::Int64) at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:406
 [4] remotecall_fetch at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.0\Distributed\src\remotecall.jl:406 [inlined]
 [5] get_workers_at(::Sockets.IPv4) at C:\Users\bernhard.konig\.julia\packages\MemPool\PUncN\src\datastore.jl:95
 [6] affinity(::MemPool.FileRef) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\chunks.jl:84
 [7] affinity(::Dagger.Chunk{Any,MemPool.FileRef}) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\chunks.jl:50
 [8] affinity(::Dagger.Thunk) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\thunk.jl:52
 [9] pop_with_affinity!(::Dagger.Context, ::Array{Dagger.Thunk,1}, ::Dagger.OSProc, ::Bool) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\scheduler.jl:97
 [10] compute_dag(::Dagger.Context, ::Dagger.Thunk) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\scheduler.jl:36
 [11] compute(::Dagger.Context, ::Dagger.Thunk) at C:\Users\bernhard.konig\.julia\packages\Dagger\sdZXi\src\compute.jl:25
 [12] #fromchunks#47(::Nothing, ::Int64, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::Function, ::Array{Dagger.Thunk,1}) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\PRJbx\src\table.jl:148
 [13] fromchunks(::Array{Dagger.Thunk,1}) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\PRJbx\src\table.jl:129
 [14] offset_index!(::JuliaDB.DNDSparse{Tuple{Int64},NamedTuple{(:x1, :x2, :x3, :x4, :x5),Tuple{Float64,Float32,Int64,Int64,Float64}}}, ::Int64) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\PRJbx\src\io.jl:28
 [15] #_loadtable#188(::Int64, ::String, ::Bool, ::Array{Any,1}, ::Bool, ::Bool, ::Base.Iterators.Pairs{Symbol,Any,Tuple{Symbol,Symbol,Symbol},NamedTuple{(:header_exists, :colparsers, :datacols),Tuple{Bool,Dict{Int64,DataType},Array{Int64,1}}}}, ::Function, ::Type, ::Array{String,1}) at C:\Users\bernhard.konig\.julia\packages\JuliaDB\PRJbx\src\io.jl:153
 [16] #_loadtable at .\none:0 [inlined]
 [17] #loadndsparse#187 at C:\Users\bernhard.konig\.julia\packages\JuliaDB\PRJbx\src\io.jl:82 [inlined]
 [18] (::getfield(JuliaDB, Symbol("#kw##loadndsparse")))(::NamedTuple{(:output, :header_exists, :chunks, :colparsers, :datacols),Tuple{String,Bool,Int64,Dict{Int64,DataType},Array{Int64,1}}}, ::typeof(loadndsparse), ::Array{String,1}) at .\none:0
 [19] top-level scope at util.jl:156

Okay, please open an issue on MemPools GitHub page.

I have had bad experiences with non-standard column types in JuliaDB. Can you change Float32 to Float64, please?

Ok.
Float64 type did not help.
I think I initially used Float32 as I wanted to test different types as shown in this tutorial (https://github.com/JuliaDB/JuliaDB_Benchmarks/blob/master/bigdata/JuliaDB%20with%20TrueFX%20dataset.ipynb)

here is a fix
thank you Julia Samaroo

https://github.com/JuliaComputing/MemPool.jl/pull/34

1 Like