Writing larger than expected binary files

When using FortranFiles, I was wondering why writing a new file, with the same data and structure as the original, produces a file whose size is significantly larger (~925MB) than the original (~25MB). Is there a compression option that I should be using when writing binary files?

MWE

using FortranFiles, DimensionalData

function getvar(fn, vname)
    f = FortranFile(fn, convert = "big-endian")

    # We need to define these variables due to scoping within the while loop
    slab, ifv, hdate, xfcst, map_source, field, units, desc,
    xlvl, nx, ny, iproj, projrecs, is_wind_earth_rel, meta = [nothing for _ in 1:15]

    levels = Float32[]

    while !eof(f)
        ifv = read(f, Int32)
        hdate, xfcst, map_source, field, units, desc, xlvl, nx, ny, iproj =
            read(f, FString{24}, Float32, FString{32}, FString{9},
                FString{25}, FString{46}, Float32, Int32, Int32, Int32)

        projrecs = projectionrec(Val(Int(iproj)), f)
        is_wind_earth_rel = read(f, Int32)

        if String(trim(field)) == vname
            push!(levels, xlvl)
            if slab == nothing
                slab = read(f, (Float32, nx,ny))
            else
                slab = cat(slab, read(f, (Float32, nx,ny)), dims = 3)
            end
            meta = (ifv = ifv, hdate = hdate, xfcst = xfcst, map_source = map_source,
                units = units, desc = desc, iproj = iproj, projrecs = projrecs,
                is_wind_earth_rel = is_wind_earth_rel)
        else
            read(f)
        end
    end
    close(f)

    DimArray(slab, DimensionalData.formatdims(slab, (X(1:nx), Y(1:ny), Z(levels))), (), Symbol(vname), meta)
end

function Base.write(fn::String, T::Tuple)
    isfile(fn) ? rm(fn) : nothing
    f = FortranFile(fn, "w", convert = "big-endian")
    nx = convert(Int32, size(T[1], X))
    ny = convert(Int32, size(T[1], Y))
    xlvl = convert(Array{Float32}, val(T[1], Z))

    for eachlev in axes(T[1], 3)
        for (idx, eachvar) in enumerate(T)
            meta = metadata(eachvar)
            field = FString(9, String(name(eachvar)))
            projrecs = values(meta[:projrecs])

            write(f, meta[:ifv])
            write(f, meta[:hdate], meta[:xfcst], meta[:map_source], field, meta[:units], meta[:desc], xlvl[eachlev], nx, ny,  meta[:iproj])
            write(f, projrecs...)
            write(f, meta[:is_wind_earth_rel])
            write(f, data(eachvar))
        end
    end
    close(f)
end

function projectionrec(::Val{0}, f)
    startloc, startlat, startlon, deltalat, deltalon, earth_radius = read(f, FString{8}, Float32, Float32, Float32, Float32, Float32)
    (startloc = startloc, startlat = startlat, startlon = startlon, deltalat = deltalat, deltalon = deltalon, earth_radius = earth_radius)
end

# FILE can be downloaded from https://cloudstor.aarnet.edu.au/plus/s/rolTL7Phye758q1
fn = "FILE"
pres = getvar(fn, "PRES");
TT = getvar(fn, "TT");
VV = getvar(fn, "VV");
UU = getvar(fn, "UU");
GHT = getvar(fn, "GHT");
SPECHUMD = getvar(fn, "SPECHUMD");

write("testing", (TT, VV, UU, GHT, SPECHUMD, pres))
1 Like

The large file size was due to writing the entire array, through each write cycle, rather than the relevant part. Updating the inner loop solved the problem

for (idx, eachvar) in enumerate(T)
    A = eachvar[:,:,eachlev]

    meta = metadata(A)
    field = FString(9, String(name(eachvar)))
    projrecs = values(meta[:projrecs])

    write(f, meta[:ifv])
    write(f, meta[:hdate], meta[:xfcst], meta[:map_source], field, meta[:units], meta[:desc], xlvl[eachlev], nx, ny,  meta[:iproj])
    write(f, projrecs...)
    write(f, meta[:is_wind_earth_rel])
    write(f, data(A))
end
1 Like