The reason I stay with DataFrames and readtable is that I have a better speed with readtable even if I don’t specify the eltypes. But it is possible that I have done something wrong.
$ julia DataCSV.jl
WARNING: Method definition ==(Base.Nullable{S}, Base.Nullable{T}) in module Base at nullable.jl:238 overwritten in module NullableArrays at /home/fred/.julia/v0.6/NullableArrays/src/operators.jl:99.
Reading... data.csv
Reading... data2.csv
elapsed time: 3.95377051 seconds
$ julia DataFrames.jl
Reading... data.csv
Reading... data2.csv
elapsed time: 1.566749483 seconds
DataCSV
using CSV
##########################################
# read dataframe
function readTable(file, sep, h)
println("Reading...\t", file)
x = CSV.read(file ; delim = sep, types=Dict(1=>String), header = h, null="NA") # read data file
return x
end
function main()
sep = '\t' # table separator
h = true # table header
# process data
f = ["data.csv", "data2.csv"]
for file in f
tab = readTable(file, sep, h)
end
end
##########################################
tic()
main()
toc()
DataFrames
using DataFrames
# read dataframe
function readTable(file, sep, h)
println("Reading...\t", file)
x = readtable(file , separator = sep, header = h) # read data file
return x
end
function main()
sep = '\t' # table separator
h = true # table header
# process data
f = ["data.csv", "data2.csv"]
for file in f
tab = readTable(file, sep, h)
end
end
##########################################
tic()
main()
toc()