I have the following code for unpacking .xz compressed files:
using CodecXz
function decompress(in, out)
stream = open(in)
output = open(out,"w")
for line in eachline(XzDecompressorStream(stream))
println(output, line)
end
close(stream)
close(output)
end
This does not always work due to the use of eachline and println…
How can I do this in a more generic form?
@aplavin suggested to use read(XzDecompressorStream(stream))
, but I do not want to do that because it creates a huge vector in RAM. I would prefer to do the decompression in chunks.
How can I do that in Julia?
1 Like
I guess my main problem is that I don’t fully understand how to use streams.
How can I for example copy a binary file using an input and an output stream?
function copyfile(input_file, output_file)
in_stream = open(input_file)
out_stream = open(output_file,"w")
# What to put here?
close(in_stream)
close(out_stream)
end
function copyfile(input_file, output_file)
in_stream = open(input_file)
out_stream = open(output_file,"w")
while !eof(in_stream)
write(out_stream, read(in_stream, 10000))
end
close(in_stream)
close(out_stream)
end
Something like that (maybe even exactly that) should work for your original question as well.
1 Like
Or simply
write(out_stream, in_stream)
Not working:
function decompress1(in, out)
stream = open(in)
output = open(out,"w")
while !eof(stream)
write(output, read(XzDecompressorStream(stream), 10000))
end
close(stream)
close(output)
end
It fails with:
ERROR: LoadError: lzma error: code = 7
Stacktrace:
[1] changemode!(stream::TranscodingStreams.TranscodingStream{XzDecompressor, IOStream}, newmode::Symbol)
@ TranscodingStreams ~/.julia/packages/TranscodingStreams/TsaT2/src/stream.jl:724
[2] callprocess(stream::TranscodingStreams.TranscodingStream{XzDecompressor, IOStream}, inbuf::TranscodingStreams.Buffer, outbuf::TranscodingStreams.Buffer)
@ TranscodingStreams ~/.julia/packages/TranscodingStreams/TsaT2/src/stream.jl:654
[3] fillbuffer(stream::TranscodingStreams.TranscodingStream{XzDecompressor, IOStream}; eager::Bool)
@ TranscodingStreams ~/.julia/packages/TranscodingStreams/TsaT2/src/stream.jl:582
[4] fillbuffer
@ ~/.julia/packages/TranscodingStreams/TsaT2/src/stream.jl:568 [inlined]
[5] eof(stream::TranscodingStreams.TranscodingStream{XzDecompressor, IOStream})
@ TranscodingStreams ~/.julia/packages/TranscodingStreams/TsaT2/src/stream.jl:191
[6] readbytes!(stream::TranscodingStreams.TranscodingStream{XzDecompressor, IOStream}, b::Vector{UInt8}, nb::Int64)
@ TranscodingStreams ~/.julia/packages/TranscodingStreams/TsaT2/src/stream.jl:375
[7] read(s::TranscodingStreams.TranscodingStream{XzDecompressor, IOStream}, nb::Int64)
@ Base ./io.jl:1000
[8] decompress(in::String, out::String)
@ Main ~/repos/CanAnalyzer/src/process_log.jl:97
[9] main(logfile_name::String)
@ Main ~/repos/CanAnalyzer/src/process_log.jl:500
[10] top-level scope
@ ~/repos/CanAnalyzer/src/process_log.jl:595
[11] include(fname::String)
@ Base.MainInclude ./client.jl:476
[12] top-level scope
@ REPL[1]:1
in expression starting at ...
Working:
function decompress(in, out)
stream = open(in)
output = open(out,"w")
write(output, read(XzDecompressorStream(stream)))
close(stream)
close(output)
end
So I have a working solution, but it allocates the full size of the output file in RAM…
function decompress(in, out)
stream = open(in)
output = open(out,"w")
write(output, XzDecompressorStream(stream))
close(stream)
close(output)
end
Also you should use the do-block version of open
to ensure the streams are closed even if an error occurs.
Is this better?
function decompress(input, output)
open(input) do inp
open(output,"w") do out
write(out, read(XzDecompressorStream(inp)))
end
end
end