Store DataFrame or filepath for CSV.read?

If you rarely need the actual data content of the csv files, why not just store the filepath? It looks like you need to reference field names of the csv file. You could do a partial read of the csv file to just get the header and store that along with the filepath. For example:

mutable struct CSVFileWrapper
    file::String
    header::Vector{String}
    df::DataFrame
    is_loaded::Bool
    function CSVFileWrapper(file; limit = 0, kwargs...)
        df = CSV.read(file, DataFrame; limit = limit, kwargs...)
        header = names(df)
        return new(file, header, df, false)
    end
end

function load!(f::CSVFileWrapper; kwargs...)
    if !(f.is_loaded)
        df = CSV.read(f.file, DataFrame; kwargs...)
        f.df = df
        f.is_loaded = true
    end
    return f.df
end
1 Like