I need to write parsers for ad-hoc data formats from time to time. I’ve always wondered if I’m propagating errors in a “Julian” way. I don’t know if it’s the best to represent errors by nothing
as in Base.tryparse
, a custom error type, or exceptions. Here’s my code for parsing a tuple of two letters separated by a comma. Let me know if the code can be improved.
Tests first, to give an idea about what the code will do:
@test parse_tuple_of_two_letters("a,b") == ('a', 'b')
@test parse_tuple_of_two_letters("a+b") == ParseError("error in parsing comma: Failed to read character")
@test parse_tuple_of_two_letters("1,b") == ParseError("error in parsing 1st letter: Failed to read character")
@test parse_tuple_of_two_letters("a,2") == ParseError("error in parsing 2nd letter: Failed to read character")
@test parse_letter("a") = 'a'
@test parse_letter("1") = ParseError("Failed to read character")
The actual code is below. The code uses a custom error type containing an error string. When the error type is returned by a child function, the parent function adds extra context information to give a sensible error message.
struct ParseError
message::String
end
parse_letter(io::IO) = parse_char(io, Base.isletter)
parse_expected_char(io::IO, c::Char) = parse_char(io, ==(c))
function parse_char(io::IO, requirement::Function)::Union{Char, ParseError}
eof(io) && return ParseError("End of stream!")
data = read(io, Char)
if !requirement(data)
skip(io, -1) # rewind after failed read
return ParseError("Failed to read character")
end
return data
end
parse_tuple_of_two_letters(s::String) = parse_tuple_of_two_letters(IOBuffer(s))
function parse_tuple_of_two_letters(io::IO)::Union{Tuple{Char, Char}, ParseError}
char1 = parse_letter(io)
char1 isa ParseError && return ParseError("error in parsing 1st letter: " * char1.message) # return the error in `char1` after context information
separator = parse_expected_char(io, ',')
separator isa ParseError && return ParseError("error in parsing comma: " * separator.message) #
char2 = parse_letter(io)
char2 isa ParseError && return ParseError("error in parsing 2nd letter: " * char2.message) #
return (char1, char2)
end