In the following, Parsers
is faster than Base.parse
for one line. But, when reading a file, it is much slower. (EDIT: Removed useless lines)
"""
write_test_file(fname, nlines=10_000)
write `nlines` lines of data to a simplified dimacs file.
"""
function write_test_file(fname, nlines=10_000)
nmax = 10_000
open(fname, "w") do io
for _ in 1:nlines
(a, b, c) = rand(1:nmax, 3)
println(io, "a ", a, " ", b, " ", c)
end
end
return nothing
end
test_parse_line1(line::AbstractString, args...) = test_parse_line1(IOBuffer(line), args...)
"""
test_parse_line1(io::IO, lineno=0)
Read three integers from a line of the form "a n1 n2 n3\n", consuming all characters.
The line taken from the front of the buffer `io`. Parsing is done by `Parsers`.
"""
function test_parse_line1(io::IO, lineno=0)
char = read(io, Char)
if char != 'a'
throw(ErrorException("Expecting line beginning with 'a' in line $(lineno)"))
end
from_node = Parsers.parse(Int, io)
to_node = Parsers.parse(Int, io)
weight = Parsers.parse(Int, io)
read(io, Char)
return (from_node, to_node, weight)
end
"""
test_parse_line2(io::IO, lineno=0)
Read three integers from a line of the form "a n1 n2 n3\n".
The line is an entire string.
"""
function test_parse_line2(line::AbstractString, lineno=0)
char = line[1]
if char != 'a'
throw(ErrorException("Expecting line beginning with 'a' in line $(lineno)"))
end
(_, x, y, z) = split(line)
from_node = parse(Int, x)
to_node = parse(Int, y)
weight = parse(Int, z)
return (from_node, to_node, weight)
end
"""
read_test_file2(path=path)
Read a file a line at a time into a string. Parse
each string into three integers.
"""
function read_test_file2(path=path)
lcount = 0
local res
open(path, "r") do io
while ! eof(io)
line = readline(io)
res = test_parse_line2(line, lcount)
lcount += 1
end
end
println(lcount)
println(res)
end
"""
read_test_file1(path=path)
Read lines from a file, parsing each line into three integers.
Pass over each character only once.
"""
function read_test_file1(path=path)
lcount = 0
local res
open(path, "r") do io
while ! eof(io)
res = test_parse_line1(io, lcount)
lcount += 1
end
end
println(lcount)
println(res)
end
julia> write_test_file("tfile.gr", 1_000_000)
julia> xx = "a 1234 223423 3234325\n";
julia> @btime test_parse_line1($xx)
48.260 ns (2 allocations: 128 bytes)
(1234, 223423, 3234325)
julia> @btime test_parse_line2($xx)
392.443 ns (2 allocations: 272 bytes)
(1234, 223423, 3234325)
julia> @time read_test_file1("tfile.gr")
1000000
1000000
(8274, 8890, 965)
6.893523 seconds (3.01 M allocations: 61.368 MiB, 0.06% gc time, 0.32% compilation time)
julia> @time read_test_file2("tfile.gr")
1000000
1000000
(8274, 8890, 965)
0.812866 seconds (6.00 M allocations: 362.354 MiB, 2.91% gc time, 2.30% compilation time)