using Mmap
const input_file="file.txt"
const output_file="out.txt"
read_start=2
read_stop=4
function process_file(in_fn, startline, stopline)
f = open(in_fn, "r")
mm = Mmap.mmap(f, Vector{UInt8})
l = 1
pos = 0
while l < startline
pos = last(findnext([UInt8('\n')], mm, pos+1))
l += 1
end
startpos = pos
while l <= stopline
pos = last(findnext([UInt8('\n')], mm, pos+1))
l += 1
end
stoppos = pos
#write(stdout, @view mm[startpos:stoppos])
open(output_file, "w") do file
write(file, @view mm[startpos:stoppos])
end
close(f)
end
process_file(input_file, read_start, read_stop)
But while the result of the cut is ok, thereās a blank line at the top of the out.txt file. I donāt get why.
using Mmap
const input_file="file.txt"
const output_file="out.txt"
read_start=2
read_stop=4
function process_file(in_fn, startline, stopline)
open(in_fn, "r") do f
mm = Mmap.mmap(f, Vector{UInt8})
skipline = let mm = mm
(pos, l) -> last(findnext([UInt8('\n')], mm, pos+1))
end
startpos = foldl(skipline, 1:startline; init=1)+1
stoppos = foldl(skipline, startline:stopline; init=startpos)
lines=readlines(IOBuffer(@view mm[startpos:stoppos]))
open(output_file, "w") do io
for ll = lines
write(io, ll * "\n")
end
end
end
end
process_file(input_file, read_start, read_stop)
Itās a bit slower, and the range is not ok as I got:
Thanks al lot !
Hereās the corrected versions. Note that I feel safer with a \newline at the end of the last line.
First version:
using Mmap
const input_file="file.txt"
const output_file="out.txt"
read_start=2
read_stop=4
function process_file(in_fn, startline, stopline)
f = open(in_fn, "r")
mm = Mmap.mmap(f, Vector{UInt8})
l = 1
pos = 0
while l < startline
pos = last(findnext([UInt8('\n')], mm, pos+1))
l += 1
end
startpos = pos
while l <= stopline
pos = last(findnext([UInt8('\n')], mm, pos+1))
l += 1
end
stoppos = pos
#write(stdout, @view mm[startpos:stoppos])
open(output_file, "w") do file
write(file, @view mm[startpos+1:stoppos])
end
close(f)
end
process_file(input_file, read_start, read_stop)
Second version:
using Mmap
const input_file="file.txt"
const output_file="out.txt"
read_start=2
read_stop=4
function process_file(in_fn, startline, stopline)
open(in_fn, "r") do f
mm = Mmap.mmap(f, Vector{UInt8})
skipline = let mm = mm
(pos, l) -> last(findnext([UInt8('\n')], mm, pos+1))
end
startpos = foldl(skipline, 1:startline-1; init=1)+1
stoppos = foldl(skipline, startline:stopline; init=startpos)
lines=readlines(IOBuffer(@view mm[startpos:stoppos]))
open(output_file, "w") do io
for ll = lines
write(io, ll * "\n")
end
end
end
end
process_file(input_file, read_start, read_stop)
Meanwhile I tested a way to load the extract into an array structure for further processings:
using Mmap
using DelimitedFiles
const input_file="file.txt"
read_start=2
read_stop=4
function process_file(in_fn, startline, stopline)
f = open(in_fn, "r")
mm = Mmap.mmap(f, Vector{UInt8})
l = 1
pos = 0
while l < startline
pos = last(findnext([UInt8('\n')], mm, pos+1))
l += 1
end
startpos = pos
while l <= stopline
pos = last(findnext([UInt8('\n')], mm, pos+1))
l += 1
end
stoppos = pos
csv=readdlm(mm[startpos:stoppos]; use_mmap=true)
close(f)
return csv
end
csv_like=process_file(input_file, read_start, read_stop)
println(csv_like)
println(csv_like[1,2])
CSV provides options to parse the input on read, and DataFrames is the tool for the analysis of that kind of data (surely, all is a matter of personal preference).