How to decompress .xz files/ How to use streams?

I have the following code for unpacking .xz compressed files:

using CodecXz

function decompress(in, out)
    stream = open(in)
    output = open(out,"w")
    for line in eachline(XzDecompressorStream(stream))
        println(output, line)
    end
    close(stream)
    close(output)
end

This does not always work due to the use of eachline and println…

How can I do this in a more generic form?

@aplavin suggested to use read(XzDecompressorStream(stream)), but I do not want to do that because it creates a huge vector in RAM. I would prefer to do the decompression in chunks.

How can I do that in Julia?

1 Like

I guess my main problem is that I don’t fully understand how to use streams.

How can I for example copy a binary file using an input and an output stream?

function copyfile(input_file, output_file)
    in_stream = open(input_file)
    out_stream = open(output_file,"w")

    # What to put here?

    close(in_stream)
    close(out_stream)
end
function copyfile(input_file, output_file)
    in_stream = open(input_file)
    out_stream = open(output_file,"w")
    
    while !eof(in_stream)
        write(out_stream, read(in_stream, 10000))
    end
    
    close(in_stream)
    close(out_stream)
end

Something like that (maybe even exactly that) should work for your original question as well.

1 Like

Or simply

write(out_stream, in_stream)

Not working:

function decompress1(in, out)
    stream = open(in)
    output = open(out,"w")
    while !eof(stream)
        write(output, read(XzDecompressorStream(stream), 10000))
    end
    close(stream)
    close(output)
end

It fails with:

ERROR: LoadError: lzma error: code = 7
Stacktrace:
  [1] changemode!(stream::TranscodingStreams.TranscodingStream{XzDecompressor, IOStream}, newmode::Symbol)
    @ TranscodingStreams ~/.julia/packages/TranscodingStreams/TsaT2/src/stream.jl:724
  [2] callprocess(stream::TranscodingStreams.TranscodingStream{XzDecompressor, IOStream}, inbuf::TranscodingStreams.Buffer, outbuf::TranscodingStreams.Buffer)
    @ TranscodingStreams ~/.julia/packages/TranscodingStreams/TsaT2/src/stream.jl:654
  [3] fillbuffer(stream::TranscodingStreams.TranscodingStream{XzDecompressor, IOStream}; eager::Bool)
    @ TranscodingStreams ~/.julia/packages/TranscodingStreams/TsaT2/src/stream.jl:582
  [4] fillbuffer
    @ ~/.julia/packages/TranscodingStreams/TsaT2/src/stream.jl:568 [inlined]
  [5] eof(stream::TranscodingStreams.TranscodingStream{XzDecompressor, IOStream})
    @ TranscodingStreams ~/.julia/packages/TranscodingStreams/TsaT2/src/stream.jl:191
  [6] readbytes!(stream::TranscodingStreams.TranscodingStream{XzDecompressor, IOStream}, b::Vector{UInt8}, nb::Int64)
    @ TranscodingStreams ~/.julia/packages/TranscodingStreams/TsaT2/src/stream.jl:375
  [7] read(s::TranscodingStreams.TranscodingStream{XzDecompressor, IOStream}, nb::Int64)
    @ Base ./io.jl:1000
  [8] decompress(in::String, out::String)
    @ Main ~/repos/CanAnalyzer/src/process_log.jl:97
  [9] main(logfile_name::String)
    @ Main ~/repos/CanAnalyzer/src/process_log.jl:500
 [10] top-level scope
    @ ~/repos/CanAnalyzer/src/process_log.jl:595
 [11] include(fname::String)
    @ Base.MainInclude ./client.jl:476
 [12] top-level scope
    @ REPL[1]:1
in expression starting at ...

Working:

function decompress(in, out)
    stream = open(in)
    output = open(out,"w")
    write(output, read(XzDecompressorStream(stream)))
    close(stream)
    close(output)
end

So I have a working solution, but it allocates the full size of the output file in RAM…

function decompress(in, out)
    stream = open(in)
    output = open(out,"w")
    write(output, XzDecompressorStream(stream))
    close(stream)
    close(output)
end

Also you should use the do-block version of open to ensure the streams are closed even if an error occurs.

Is this better?

function decompress(input, output)
    open(input) do inp
        open(output,"w") do out
            write(out, read(XzDecompressorStream(inp)))
        end
    end
end