Maybe there would not be too many changes necessary to get option 3) running. I did a few small tests and the following seems to work at least for reading array without relying on Mmap or reading the whole archive, in case someone wants to experiment.
import DiskArrays: AbstractDiskArray, DiskArrays, Unchunked, Chunked, GridChunks
struct SimpleFileDiskArray{C<:Union{Int,Nothing}} <: AbstractDiskArray{UInt8,1}
file::String
s::Int
chunksize::C
end
Base.size(s::SimpleFileDiskArray) = (s.s,)
function SimpleFileDiskArray(filename;chunksize=nothing)
isfile(filename) || throw(ArgumentError("File $filename does not exist"))
s = filesize(filename)
SimpleFileDiskArray(filename, s, chunksize)
end
function DiskArrays.readblock!(a::SimpleFileDiskArray,aout,i::AbstractUnitRange)
open(a.file) do f
seek(f,first(i)-1)
read!(f,aout)
end
end
DiskArrays.haschunks(a::SimpleFileDiskArray) = a.chunksize === nothing ? Unchunked() : Chunked()
function DiskArrays.eachchunk(a::SimpleFileDiskArray)
if a.chunksize === nothing
DiskArrays.estimate_chunksize(a)
else
GridChunks((a.s,),(a.chunksize,))
end
end
function Base.copyto!(dest::AbstractArray, desto::Int, src::SimpleFileDiskArray, so::Int,N::Int)
destv = view(dest,range(desto,length=N))
DiskArrays.readblock!(src,destv,range(so,length=N))
end
using ZipArchives
file = "../testzip.zip"
r = ZipReader(SimpleFileDiskArray(file))
entries = zip_names(r)
@time data = zip_readentry(r,entries[2])
Char.(data)
Maybe the SimpleFileDiskArray could be added to DiskArrays.jl in case it proves to be useful.