Struggling to use Mmap with ZipArchives

Maybe there would not be too many changes necessary to get option 3) running. I did a few small tests and the following seems to work at least for reading array without relying on Mmap or reading the whole archive, in case someone wants to experiment.

import DiskArrays: AbstractDiskArray, DiskArrays, Unchunked, Chunked, GridChunks

struct SimpleFileDiskArray{C<:Union{Int,Nothing}} <: AbstractDiskArray{UInt8,1}
    file::String
    s::Int
    chunksize::C
end
Base.size(s::SimpleFileDiskArray) = (s.s,)
function SimpleFileDiskArray(filename;chunksize=nothing)
    isfile(filename) || throw(ArgumentError("File $filename does not exist"))
    s = filesize(filename)
    SimpleFileDiskArray(filename, s, chunksize)
end
function DiskArrays.readblock!(a::SimpleFileDiskArray,aout,i::AbstractUnitRange)
    open(a.file) do f
        seek(f,first(i)-1)
        read!(f,aout)
    end
end
DiskArrays.haschunks(a::SimpleFileDiskArray) = a.chunksize === nothing ? Unchunked() : Chunked() 
function DiskArrays.eachchunk(a::SimpleFileDiskArray) 
    if a.chunksize === nothing
        DiskArrays.estimate_chunksize(a)
    else
        GridChunks((a.s,),(a.chunksize,))
    end
end

function Base.copyto!(dest::AbstractArray, desto::Int, src::SimpleFileDiskArray, so::Int,N::Int)
    destv = view(dest,range(desto,length=N))
    DiskArrays.readblock!(src,destv,range(so,length=N))
end

using ZipArchives
file = "../testzip.zip"
r = ZipReader(SimpleFileDiskArray(file))

entries = zip_names(r)
@time data = zip_readentry(r,entries[2])
Char.(data)

Maybe the SimpleFileDiskArray could be added to DiskArrays.jl in case it proves to be useful.

3 Likes