Testing show methods

lmiq · December 17, 2024, 7:44pm

I have defined a simple function to test show methods. It works like this:

julia> struct A
           x::Int
           y::Float64
           p::String
           v::Vector{Float64}
       end

julia> function Base.show(io::IO, ::MIME"text/plain", a::A)
           print(io,"""
           My object of type A has x$(a.x) int, 
           and a path $(a.p) and also a $(a.y) float, an $(a.v) vector.
           """)
       end

julia> a
My object of type A has x1 int, 
and a path /home/user/path.pdb and also a 2.0 float, an [1.0, 2.0, 3.141592653589793] vector.


julia> test_show(a,
       """
       My object of type A has x1 int, 
       and a path /home/user/path.pdb and also a 2.0 float, an [1.0, 2.0, 3.141592653589793] vector.
       """
       )
true

The precision to which the floats and ints are compared can be set with keyword arguments (and digits are separated from non-digits characters, except dots, to isolate numbers), the paths are tested only for their last field (to avoid test failures in different machines/CI), and arrays are simplified in such a way that only the first an element are compared, avoiding test failures associated to the number of array elements printed).

I wonder if something like this, but more carefully implemented, would be useful in general?

test_show.jl

    function test_show(
        x, s::String; 
        f64 = (x1,x2) -> isapprox(x1,x2,rtol=1e-3),
        i64 = (x1,x2) -> x1 == x2, 
        vector_simplify = true,
        repl = Dict(),
    )
        match(f,x1,x2) = begin
            if !f(x1,x2)
                println("show method test failed with $x1 ($(typeof(x1))) == $x2 ($(typeof(x2)))")
                return false
            end
            return true
        end
        buff = IOBuffer()
        show(buff, MIME"text/plain"(), x)
        ss = String(take!(buff))
        # Custom substitutions
        s = replace(s, repl...)
        ss = replace(ss, repl...)
        # add spaces between digits and other characters (except dots), to interpret them as numbers
        s = replace(s, r"(?<=\d)(?=[^\d.])|(?<=[^\d.])(?=\d)" => s" ")
        ss = replace(ss, r"(?<=\d)(?=[^\d.])|(?<=[^\d.])(?=\d)" => s" ")
        if vector_simplify # keep only first and last array elements
            s = replace(s, r"\[ (\S+).* (\S+)\ ]" => s"[ \1 \2 ]")
            ss = replace(ss, r"\[ (\S+).* (\S+)\ ]" => s"[ \1 \2 ]")
        end
        sfields = split(s)
        ssfields = split(ss)
        all_match = true
        for (f1, f2) in zip(sfields, ssfields)
            !all_match && break
            if ispath(f2) # only compares the last entry for paths
                all_match = last(split(f1)) == last(split(f2))
                continue
            end
            value = tryparse(Int, f1) # test if f1 can be interpreted as an integer
            if !isnothing(value)
                all_match = match(i64, value, tryparse(Int, f2))
                continue
            end
            value = tryparse(Float64, f1) # test if f1 can be interpreted as a float
            if !isnothing(value)
                all_match = match(f64, value, tryparse(Float64,f2))
                continue
            end
            all_match = match(isequal, f1, f2)
        end
        return all_match
    end

lmiq · December 17, 2024, 11:41pm

Now with a bit more structured code:

test_show.jl

    struct TestShowString
        parsed_show::String
    end
    Base.show(io::IO, x::TestShowString) = print(io, x.parsed_show)

    function Base.isequal(
        x::TestShowString, 
        y::TestShowString;
        f64 = (x1,x2) -> isapprox(x1,x2,rtol=1e-3),
        i64 = (x1,x2) -> x1 == x2, 
        path = (x1,x2) -> last(splitpath(x1)) == last(splitpath(x2)),
        assertion_error = true,
    )
        match(f,x1,x2) = begin
            if !f(x1,x2)
                if assertion_error
                    throw(AssertionError("""
    
                        show method equality failed with $x1 ($(typeof(x1))) == $x2 ($(typeof(x2)))")
    
                    """))
                end
                return false
            end
            return true
        end
        s = x.parsed_show
        ss = y.parsed_show
        sfields = split(s)
        ssfields = split(ss)
        all_match = true
        for (f1, f2) in zip(sfields, ssfields)
            !all_match && break
            if ispath(f2) || ispath(f1)
                all_match = match(path, last(splitpath(f1)), last(splitpath(f2)))
                continue
            end
            value = tryparse(Int, f1) # test if f1 can be interpreted as an integer
            if !isnothing(value)
                all_match = match(i64, value, tryparse(Int, f2))
                continue
            end
            value = tryparse(Float64, f1) # test if f1 can be interpreted as a float
            if !isnothing(value)
                all_match = match(f64, value, tryparse(Float64,f2))
                continue
            end
            all_match = match(isequal, f1, f2)
        end
        return all_match
    end
    Base.isequal(x::TestShowString, y::String; kargs...) = isequal(x, parse_show(y); kargs...)
    Base.isequal(x::String, y::TestShowString; kargs...) = isequal(parse_show(x), y; kargs...)

    import Base: ==
    ==(x::TestShowString, y::TestShowString; kargs...) = isequal(x, y; kargs...)
    ==(x::TestShowString, y::String; kargs...) = isequal(x, y; kargs...) 
    ==(x::String, y::TestShowString; kargs...) = isequal(x, y; kargs...)

    function parse_show(x;
        vector_simplify = true,
        repl = Dict(),
    )
        buff = IOBuffer()
        show(buff, MIME"text/plain"(), x)
        parse_show(String(take!(buff)); vector_simplify, repl)
    end

    function parse_show(x::String;
        vector_simplify = true,
        repl = Dict(),
    )
        # Custom replacements
        s = replace(x, repl...)
        # add spaces between digits and other characters (except dots), to interpret them as numbers
        s = replace(s, r"(?<=\d)(?=[^\d.])|(?<=[^\d.])(?=\d)" => s" ")
        if vector_simplify # keep only first and last array elements
            s = replace(s, r"\[ (\S+).* (\S+)\ ]" => s"[ \1 \2 ]")
        end
        return TestShowString(s)
    end

to be used as:

julia> struct A
           x::Int
           y::Float64
           p::String
           v::Vector{Float64}
       end

julia> function Base.show(io::IO, ::MIME"text/plain", a::A)
           print(io,"""
           My object of type A has x$(a.x) int, 
           and a path $(a.p) and also a $(a.y) float, an $(a.v) vector.
           """)
       end

julia> a = A(1, 1.0, "/home/test/file.txt", [1.0, 2.0, 3.0, 4.0])
My object of type A has x1 int, 
and a path /home/test/file.txt and also a 1.0 float, an [1.0, 2.0, 3.0, 4.0] vector.


julia> parse_show(a) == """
       My object of type A has x1 int, 
       and a path /home/test/file.txt and also a 1.0 float, an [1.0, 2.0, 3.0, 4.0] vector.
       """
true

I find that useful. But, honestly, how to you all test show method? Or you don’t and remove them from code coverage? Am I’m missing some known functionality?

aplavin · December 18, 2024, 12:27am

@test sprint(show, myobj) == "expected output" is enough for the vast majority of cases.

lmiq · December 18, 2024, 12:32am

That might be true. But still there is niche of show methods that depend on the output of calculations having floating point numbers, paths, and other context-dependent variables for which I miss something more flexible.

I’ve been using jldoctests with filters in some cases, but it also becomes cumbersome to define appropriate regexps that keep the test meaningful in many cases.

ericphanson · December 18, 2024, 12:48am

I usually do

@test contains(sprint(show, obj), "some keywords")

since often I just want to check my method is being hit and is not throwing an error. That way I don’t have to bother copy-pasting output or updating when some internal bit of the object changes its printing.

stevengj · December 18, 2024, 2:58am

I would tend to use repr(myobj) (which calls the 2-argument show) or repr("text/plain", myobj) (which calls the 3-argument show) to test show methods.

lmiq · December 18, 2024, 5:49pm

What am I missing? In this simple example repr(a) does not seem to access the show method defined:

julia> struct A
           x::Int
           path::String
           vec::Vector{Float64}
       end

julia> Base.show(io::IO, ::MIME"text/plain", a::A) = print(io, "Object with Int($(a.x)), $(a.path) and $(a.vec)")

julia> a = A(1, "/usr/bin/bash", [1.0, π, 7.5, √2])
Object with Int(1), /usr/bin/bash and [1.0, 3.141592653589793, 7.5, 1.4142135623730951]

julia> repr(a)
"A(1, \"/usr/bin/bash\", [1.0, 3.141592653589793, 7.5, 1.4142135623730951])"

Neither does sprint(show, obj): this is from the show help entry:

julia> struct Day
           n::Int
       end

julia> Base.show(io::IO, ::MIME"text/plain", d::Day) = print(io, d.n, " day")

julia> Day(1)
1 day

julia> sprint(show, Day(1))
"Day(1)"

julia> repr(Day(1))
"Day(1)"

ericphanson · December 18, 2024, 6:06pm

As @stevengj said, you need repr("text/plain", myobj) to invoke the 3-arg show with MIME type:

julia> repr("text/plain", A(1, "/usr/bin/bash", [1.0, π, 7.5, √2]))
"Object with Int(1), /usr/bin/bash and [1.0, 3.141592653589793, 7.5, 1.4142135623730951]"

Likewise for sprint(show, ...) you need to pass the MIME type:

julia> sprint(show, MIME"text/plain"(),  A(1, "/usr/bin/bash", [1.0, π, 7.5, √2]))
"Object with Int(1), /usr/bin/bash and [1.0, 3.141592653589793, 7.5, 1.4142135623730951]"

kellertuer · December 18, 2024, 8:18pm

I usually use repr as well, but also usually only implement the 2-parameter show.

to avoid my tests failing because the representation of an inner part fails – maybe spaces in an array or so, I usually “program” the expected_value as well. Something like (simplified, just assuming A.x in your variant, that has a one-parameter constructor, I hope you get the idea

x = 17
expected_string = "A($(x))"
@test repr(A(x)) == expected_string

That way, if repo(x) changes, both my expected string and the repo(A(x)) change.
And I do test all my show() methods for sure, since especially for parametric types, show should provide something (far more) readable (hopefully).

lmiq · December 18, 2024, 8:27pm

Yeah, I think it is something reasonable enough to just test if the show method is working (if it does not throw an error). On the other side, for a more precise testing something more on the lines of what a jldoctest does (comparing the full output) is necessary.

kellertuer · December 18, 2024, 8:31pm

My approach does test the whole output in the end, it just assumes that $(x) (or rep(x) if you use that) is already tested elsewhere/before and is not the thing I want to test for this case.
Especially if x is not a type I own – elsewhere. If I own the type of x I would test its showing separately.

Maybe also a subjective feeling on what to test – my suggestion is maybe a bit of a modular approach, testing only the parts that A actually produces itself.

lmiq · December 19, 2024, 12:31am

That’s the case I have in mind. How to perform that test.

kellertuer · December 19, 2024, 7:11am

Well, if you have own both the type/struct x and A(x), you would test A as above and write a separate test for the show of x?

Can you maybe explain a bit where you are stuck, because I do not see much of a trouble with writing a second test for repr(x) == expexted_x_value?

lmiq · December 19, 2024, 8:45am

“Expected value” might not be deterministic in many ways. My current approach deals with these issues, so far:

numbers might fluctuate because of random components, thus it is better to compare them with a limited precision.
arrays are printed differently (with different number of elements) depending on the context.
paths are different each machine.
some data might be platform/version dependent.

Thus, hard coding an expected value is not always possible.

kellertuer · December 19, 2024, 9:03am

For all of these I would do the same “string production” in the expected value as in the show method – see the "A($(x))" string above. That way both the expected value and your show method do produce the same string, since you do not have “control” about the values as you said. but the “part around” (that it is within show of A) can be tested this way consistently. On every machine.

lmiq · December 19, 2024, 9:06am

Well, but that is just testing that the method does not throw an error.

It might be fine, but it is not really testing that the output is the expected one.

I’m leaning toward the contains or some variant of that as my preferred approach.

cjdoris · December 19, 2024, 9:23am

To add to this, you can also use a regular expression on the right hand side - this means you can match the whole output but ignore any variable parts with .* or whatever.

kellertuer · December 19, 2024, 9:38am

I fear I do not follow you here.

If I have that a = A(x) for some x=1.23e-1 prints is A(0.12) due to the formatting, then so does expected_value = "A($(x))" yield the string "A(0.12)" (you have to use the same string-creation on x in the expected value as within the show method of course).

And if you then test repr(A(x)) == expected_value you test everything of the show method that is not (!) the creation of the representation of x.

So I am not sure how this does only test that the method does not throw an error. Sure the common part is both your show method and the expected string use the same way to create a representation of x – but that allows to really test that every other character outside of that is as expected.

This can surely (partly or fully) also be done with (one or several) contains.

Sukera · December 19, 2024, 9:40am

I’ve had this same thought for the tests of show in Supposition.jl. For context, this is what one of the types from my package looks like:

julia> using Supposition

julia> fs =  Data.Floats(; minimum=4.5, maximum=7.9)
Supposition.Data.AllFloats:

    Produce a floating point value of type Float16, Float32 or Float64, which may be
        * isinf: never
        * isnan: maybe
        * 4.5 <= x <= 7.9

E.g. 7.229899f0, a Float32; isinf: ❌, isnan: ❌

julia> fs
Supposition.Data.AllFloats:

    Produce a floating point value of type Float16, Float32 or Float64, which may be
        * isinf: never
        * isnan: maybe
        * 4.5 <= x <= 7.9

E.g. 7.74283821213094, a Float64; isinf: ❌, isnan: ❌

Most of that output is always the same, for one instance of Data.Floats; only the example at the bottom (which is randomly generated) changes. Since Supposition.jl is about testing, I’ve of course also added tests for these outputs. I’ve chosen to go with two kinds of tests:

Check that one-arg repr (which calls two-arg show) is evalable, i.e. that calling eval(Meta.parse(repr(<obj>))) creates an object that is == to obj.
Check that various bits that are important to communicate to the user show up in the two-arg repr.

The former is quite literally just a loop over example objects with an @test attached to it. I could of course randomize the exact values used here, but I don’t really see the point - the codepaths that are tested with this don’t branch on those values, they merely display them.

The latter consists of a bunch of specific-per-type occurrences in the output of two-arg repr. The types tested there are for generating random values, and you can steer their behavior with various options passed to the constructor. E.g. for the Data.Floats from above, there are 4 arguments you can pass:

a minimum value
a maximum value
allow infinities (defaults to be inferred from bounds)
allow NaNs (defaults true)

So there are a bunch of combinations that could be tested here. However, since I have control over show, I can make sure to only test for the bits that are important - that the settings passed in by the user which end up affecting the display are shown correctly. In essence, this results in a bunch of occursin checks with a very narrow focus:

github.com

Seelengrab/Supposition.jl/blob/e0e108f24b042ea64821aaf0ea9db56fc9da127f/test/runtests.jl#L1129-L1134


      
          minmax_float16 = Data.Floats{Float16}(;minimum=4.0, maximum=5.0)
          @test occursin("4.0 <= x <= 5.0", repr("text/plain", minmax_float16))
          @test occursin("isinf: never", repr("text/plain", minmax_float16))
          @test occursin("isinf: maybe", repr("text/plain", Data.Floats{Float16}(;minimum=4.0)))
          @test occursin("isinf: maybe", repr("text/plain", Data.Floats{Float16}(;maximum=5.0)))
          @test occursin("AllFloats", repr("text/plain", Data.Floats()))

The reason that I don’t test the entire text is that the exact wording of the show method may change due to a typo or something like that, and that would lead to a lot of unnecessary code churn in the testsuite. It’s only the things that actually matter for understanding what the object is that are tested for.

There are definitely some bits here that I could randomize (e.g. the minima/maxima could be printed wrongly), but overall, this strategy has served me well.

As for paths - I’d just use the same strategy, while ensuring they are printed in some easily checkable way, no matter the platform. E.g. by putting them on their own line in the output, with a clear keyword in front (like the isinf: check in the Data.Floats tests), which makes it very easy to test only that subset of the output in isolation. For example, what you could do is something like

   * path: <path>

which you can pretty easily extract the path string from (just strip the * path: bit at the front), which you can then parse (if necessary) & compare to the actual path stored in the object. That would also be platform independent.

lmiq · December 19, 2024, 2:32pm

I think the above post shows examples where the output of show can get complicated to analyze by a simple comparison. Other typical examples are structures that carry the result of simulations which have to be generate on-the-flight for testing, and might contain results (sometimes not completely deterministic results - even if using stable rngs). Another context-dependent output is the number of threads, which might appear directly (or indirectly) in the output of show of an object.

Anyway, I appreciate all the comments here. They gave me an overview of the possibilities and common alternatives, and, also, a perspective on that having a single function that satisfies the requirements of everyone would be very difficult.

Topic		Replies	Views
`show` method for `Function` subtype General Usage	1	461	March 2, 2022
"show" nested struct General Usage	4	573	October 9, 2023
Cannot call `show(io,MIME...` General Usage	2	380	February 12, 2021
Defining custom show method on REPL for custom Array General Usage question	2	580	September 26, 2018
Extending Base.:show for Array of types General Usage	3	1480	November 20, 2019

Testing show methods

Related topics