I’m trying to have a progress bar (like ProgressMeter.jl) on reading a file. The file might or might not be compressed and a priori the only thing I know is the total number of bytes of the file.
Right now I have
open(fn) do io
io = compression == :zstd ? CodecZstd.ZstdDecompressorStream(io) : io
while !eof(io)
line = readline(io)
( ... do stuff ... )
end
end
I would like to add a progress bar. However for these to be useful in telling how much longer I have to wait, I should pass it the total size of the thing I’m reading. Since the file might be compressed I can’t pass it number of lines, so my idea is pass it total number of bytes, and every time I read some bytes update it. Something like:
import ProgressMeter
fs = filesize(fn)
progress = ProgressMeter.Progress(fs)
open(fn) do io
io = compression == :zstd ? CodecZstd.ZstdDecompressorStream(io) : io
while !eof(io)
line, nbytes = read_line_and_return_bytes(io)
( ... do stuff ... )
Progress.update!(p, nbytes)
end
end
The ProgressMeter almost has an example [1] how to do this
using ProgressMeter
function readFileLines(fileName::String)
file = open(fileName,"r")
seekend(file)
fileSize = position(file)
seekstart(file)
p = Progress(fileSize; dt=1.0) # minimum update interval: 1 second
while !eof(file)
line = readline(file)
# Here's where you do all the hard, slow work
update!(p, position(file))
end
end
except this only works from plain files (as far as I can tell; correct me if I’m wrong). The reason is that position(io)
is implemented for IOStream
but not for TranscodingStream{CodecZstd.ZstdDecompressor, IOStream}
I can access the thing I want by examining a TranscodingStream’s internals, that’s the stream field [2]
So, this works:
import ProgressMeter
import TranscodingStreams
positionbytes(x::IOStream) = position(x)
positionbytes(x::TranscodingStreams.TranscodingStream{C,IOStream}) where {C} = position(x.stream)
fs = filesize(fn)
progress = ProgressMeter.Progress(fs)
open(fn) do io
io = compression == :zstd ? CodecZstd.ZstdDecompressorStream(io) : io
prev_bytes = 0
while !eof(io)
line = realine(io)
( ... do stuff ... )
if positionbytes(io) > prev_bytes
ProgressMeter.update!(p, positionbytes(io))
prev_bytes = positionbytes(io)
end
end
end
This works, but it seems a very clunky way of doing something that I would imagine is a common use case: getting an estimate of how much longer it’s gonna take to process a file.
Any cleaner approaches?
[1] GitHub - timholy/ProgressMeter.jl: Progress meter for long-running computations
[2] TranscodingStreams.jl/src/stream.jl at 76543edbc3e2433d5a83d252d021d352675bf931 · JuliaIO/TranscodingStreams.jl · GitHub