Read lines from file without new allocations

Adjusted the code to solve the two bugs. Will edit it more here if I find another mistake. Would also be possible to check if the buffer size was chosen correctly as each iter (except the last one in case of missing \n) should yield a “sentence”.

Would be even nicer if this could be wrapped in a Generator/Iterator such that it would function in exactly the same way as eachline but then without excessive allocs :slight_smile: . Not familiar with how to implement that though…

function bytes_read(file_path::String, buffer::Int , query::String)
    newline = 0x0a
    empty = 0x00

    # Allocate buffer, twice as big as "buffer" argument
    tot_alloc::Int64 = buffer * 2
    arr = Vector{UInt8}(undef, tot_alloc)
    fill!(arr, empty)
    io =  open(file_path, "r")   
   
    # Some bs match count test
    matches = 0
    
    # Keep track of bytes read (below) so we can 
    # strip old data when bytes_read < buffer size
    bytes_read = 0
    
    # Keep track of where the newline characters were
    # in the current iter and where they will be in the 
    # next iter (after moving the block)
    # Note, buffer + 1 as in the 1st iter only the last
    # half of the array is filled
    from = cur_stop = buffer + 1
     
    # Keep reading chunks until we reach the EOF
    while !eof(io)        
        # Move last read chunk to front of the array
        # (useless in first iter)
        @inbounds for i in 1:buffer
            arr[i] = arr[i+buffer]
        end
        
        # Store new chunk in second part of the array
        bytes_read = readbytes!(io, view(arr, buffer+1:tot_alloc), buffer)  
                
        # If we read less than the buffer size we have to reset the array
        # values after "bytes_read" as this is old data (previous read)
        if bytes_read < buffer
            @inbounds for i in buffer+bytes_read+1:tot_alloc
                arr[i] = empty
            end   
        end   
        
        cur_stop = from
        
        # Search for newline chars and generate StringView when found
        @inbounds for i in from:tot_alloc            
            if arr[i] == newline # newline
                line = StringView(view(arr, cur_stop:i-1))
                # Just for testing:
                matches += Int(line == query)
                # Update newline location in current iter
                cur_stop = i + 1 
                # Update newline location for next iter
                from = i - buffer + 1
            end
        end
    end
    
    # We missed the last line when:
    # - there was a missing \n + we read less than the buffer size
    @inbounds if arr[buffer+bytes_read] != newline
        line = StringView(view(arr, cur_stop:buffer+bytes_read))
        matches += Int(line == query) # bs test again
    end
        
    close(io) 
    return matches
end