@GunnarFarneback That’s a nice one-liner! But unfortunately none of that avoids the problem of allocating the line
string, and seems to introduce some overhead besides.
To do some tests, I created a file of random strings on 100,000 lines
open("testout.txt", "w") do f
for i in range(1,10000)
println(f, randstring(5))
end
end
function test_eachline()
buf = IOBuffer()
for line in eachline("testout.txt")
if line[1] == 'K'
println(buf, line)
end
end
String(take!(buf))
end
function test_eachline_IOBuffer()
buf = IOBuffer()
for line in eachline(IOBuffer(read("testout.txt")))
if line[1] == 'K'
println(buf, line)
end
end
String(take!(buf))
end
function test_oneliner()
join((line for line in eachline("testout.txt", keep = true) if startswith(line, "K")))
end
function test_seekpeek()
buf = IOBuffer()
open("testout.txt", "r") do f
while !eof(f)
if peek(f, Char) == 'K'
print(buf, readuntil(f, '\n', keep=true))
else
skipchars(!=('\n'), f)
skip(f, 1)
end
end
end
String(take!(buf))
end
function test_seekpeek_IOBuffer()
buf = IOBuffer()
f = IOBuffer(read("testout.txt"))
while !eof(f)
if peek(f, Char) == 'K'
print(buf, readuntil(f, '\n', keep=true))
else
skipchars(!=('\n'), f)
skip(f, 1)
end
end
String(take!(buf))
end
function test_seekpeek_eachchar()
buf = IOBuffer()
f = IOBuffer(read("testout.txt"))
while !eof(f)
if peek(f, Char) == 'K'
ch = read(f, Char)
while ch != '\n'
write(buf, ch)
eof(f) && break
ch = read(f, Char)
end
!eof(f) && write(buf, ch)
else
skipchars(!=('\n'), f)
skip(f, 1)
end
end
String(take!(buf))
end
@btime test_eachline()
# 3.258 ms (100033 allocations: 2.32 MiB)
@btime test_eachline_IOBuffer()
# 3.736 ms (200024 allocations: 15.10 MiB)
@btime test_oneliner()
# 3.604 ms (200035 allocations: 3.85 MiB)
@btime test_seekpeek()
# 226.700 ms (3478 allocations: 312.97 KiB)
@btime test_seekpeek_IOBuffer()
# 2.644 ms (3480 allocations: 872.14 KiB)
@btime test_seekpeek_eachchar()
# 2.648 ms (24 allocations: 615.64 KiB)
The last one finally avoided the sheer number of allocations I wanted to avoid (by avoiding allocating line
on every loop), but maybe proved that there’s something under the hood going on that means I shouldn’t care about all those allocations: the total allocated space actually increased over the seek-peek using readuntil
.
(edited to hide my use of @time
and the irrelevant numbers that generated; and again to fix one-liner)