Note that JLD2
files are HDF5 files, so you can write data to an existing file without having to read its contents first. Here is an example setup…
using JLD2: JLD2, jldopen
using FileIO: load
struct Result
graph::Vector{Pair{Int, Int}}
spectrum::Vector{Float64}
end
function make_rand_result(len_g=1000, len_s=1000)
graph = [rand(1:len_g) => rand(1:len_g) for _ in 1:len_g]
spectrum = rand(len_s)
return Result(graph, spectrum)
end
"""
append_result_data(fname::AbstractString, gname::String, result::Result)
Append a `Result` instance to a result file for a particular frequency and pair of scan parameters.
## Arguments
- `fname`: The name of the result file to be appended to.
- `gname`: The unique `JLD2` group name to be used in the file for grouping the data
associated with this particular `Result`.
- `result`: The `Result` data to be written to the file.
"""
function append_result_data(fname::AbstractString, gname::String, result::Result)
jldopen(fname, "a") do fid
group = JLD2.Group(fid, gname)
group["result"] = result
end
return
end
"""
Read a result file (in JLD2 format) and return a vector of results.
"""
function read_result_file(fname::AbstractString)::Vector{Result}
dat = load(fname) # a Dict
ks = collect(keys(dat))
sort!(ks, by = x -> parse(Int, split(x, '/')[1]))
Result[dat[k] for k in ks]
end
fname = "results.jld2"
@time for k in 1:100
result = make_rand_result()
append_result_data(fname, string(k), result)
end
The result of executing this code is
0.153978 seconds (90.18 k allocations: 11.762 MiB)
so each write takes about 0.0015 seconds on my machine (using an SSD). For this small amount of data one can read in the entire file at once using the read_result_file
function:
julia> results = read_result_file("results.jld2")
100-element Vector{Result}:
Result([101 => 539, 595 => 14, 157 => 177, 217 => 605, 936 => 594, 34 => 199, 798 => 597, 635 => 95, 149 => 669, 46 => 289 … 553 => 829, 463 => 229, 496 => 658, 298 => 627, 236 => 862, 154 => 48, 736 => 729, 512 => 277, 653 => 141, 913 => 978], [0.5249898601896728, 0.4312370366565087, 0.073513832333578, 0.9307962520861972, 0.570758524298132, 0.29993065399764673, 0.461428212039214, 0.48548053201183095, 0.9545877485556933, 0.2801239021403443 … 0.4034577980889896, 0.08557405938710971, 0.8975983515249012, 0.10602304568819776, 0.04273325330287514, 0.015438071286775767, 0.9906598021539139, 0.18758080699422763, 0.963555146837086, 0.39262228477199157])
Result([74 => 40, 250 => 120, 579 => 190, 691 => 108, 925 => 668, 675 => 141, 510 => 240, 389 => 320, 12 => 641, 531 => 372 … 67 => 563, 444 => 506, 817 => 139, 737 => 163, 518 => 588, 133 => 688, 279 => 535, 747 => 827, 695 => 684, 974 => 837], [0.3158561440694494, 0.9601344915366097, 0.003653937261339224, 0.2926859090457542, 0.2827751952536224, 0.9779077388680282, 0.263547348130297, 0.27975998760694254, 0.7767543085049818, 0.9597931494721555 … 0.9377423040933572, 0.21667656142445002, 0.636526779508497, 0.20705611746552255, 0.6387448271827161, 0.3646310839980138, 0.190373271599928, 0.5071072365335462, 0.39990795434930937, 0.10745351609508336])
⋮
Result([937 => 530, 906 => 467, 238 => 866, 609 => 25, 935 => 290, 872 => 503, 170 => 9, 894 => 365, 784 => 409, 807 => 327 … 229 => 506, 780 => 405, 321 => 948, 547 => 420, 122 => 30, 45 => 889, 245 => 420, 818 => 867, 299 => 420, 761 => 395], [0.17656883617243935, 0.4507217403285453, 0.04200052075311711, 0.6329462072026806, 0.11094795406276392, 0.057792051835904745, 0.4985762857207552, 0.1979714208282517, 0.9372049973093541, 0.022649614794672535 … 0.895620703038924, 0.03189989532073445, 0.012352709138806706, 0.9485344498486584, 0.5289750480073121, 0.607563134722566, 0.9184455893097113, 0.6339606316843024, 0.06139500890461502, 0.6184410686790852])
Since you will be generating too much data to fit in RAM, you can read in selected groups of data as shown in this section of the JLD2 documentation.