Performance drawback with subtyping

One thing that is important is that you have provided the hint to the compiler (and a function barrier, I guess) by using the sum function. If one replaces that function with a loop:

function paint4(p::Picture2)
  s = 0.
  for l in p.objects
      for x in l
        s += f(paint(x))
      end
  end
  s
end

Things get very bad, even worse than with any of the other alternatives:

 with array of arrays, loop sum:   76.310 ms (3997450 allocations: 76.26 MiB)

I have implemented a naive mysum function and with that I recover almost the same time of your test (thus, it is the splitting, and not other properties of the implementation of Base.sum that make the difference).

Thus, complementing your post, the correct data structure is important, but we need to take advantage of that data structure to write type-stable function calls (for me, at least, that was not immediately obvious from your example, I could well imagine that the explicit loop would perform well, it is possible that with few types the compiler would split it automatically, now I can guess).

Anyway, I think all this is very didactic. One thing that I would like know is how the “single dispatch approach in C++” mentioned in the other thread relates to this problem, and with which benefits (or not).

Current code
using BenchmarkTools
using Test

abstract type LineAbstract end

struct Line1 <: LineAbstract length::Float64 end
struct Line2 <: LineAbstract length::Float64 end
struct Line3 <: LineAbstract length::Float64 end
struct Line4 <: LineAbstract length::Float64 end
struct Line5 <: LineAbstract length::Float64 end

struct Picture{T<:LineAbstract}
       lines::Vector{T}
end 

paint(l :: Line1) = l.length
paint(l :: Line2) = l.length
paint(l :: Line3) = l.length
paint(l :: Line4) = l.length
paint(l :: Line5) = l.length

# Function to evaluate
f(x::Float64) = sin(x)

# Dynamical dispatch at runtime

function paint(p :: Picture)
  s = 0.
  for l in p.lines
    s += f(paint(l))
  end
  s
end

# Union splitting

function paint2(p::Picture)
  s = 0.
  for l in p.lines
    if l isa Line1
      s += f(paint(l))
    elseif l isa Line2
      s += f(paint(l))
    elseif l isa Line3
      s += f(paint(l))
    elseif l isa Line4
      s += f(paint(l))
    elseif l isa Line5
      s += f(paint(l))
    end
  end
  s
end

# Union splitting with annotated calls

function paint2_annotated(p::Picture)
  s = 0.
  for l in p.lines
    if l isa Line1
      s += f(paint(l::Line1))
    elseif l isa Line2
      s += f(paint(l::Line2))
    elseif l isa Line3
      s += f(paint(l::Line3))
    elseif l isa Line4
      s += f(paint(l::Line4))
    elseif l isa Line5
      s += f(paint(l::Line5))
    end
  end
  s
end

# Functors

(line::Line1)() = line.length
(line::Line2)() = line.length
(line::Line3)() = line.length
(line::Line4)() = line.length
(line::Line5)() = line.length

function paint3(p :: Picture)
  s = 0.
  for l in p.lines
    s += f(l())
  end
  s
end

# good data structure

struct Picture2
    objects::Vector
end 

function buildp2(p)
    p2 = [Line1[], Line2[], Line3[], Line4[], Line5[]]
    for l in p.lines
        if l isa Line1
            push!(p2[1], l)
        elseif l isa Line2
            push!(p2[2], l)
        elseif l isa Line3
            push!(p2[3], l)
        elseif l isa Line4
            push!(p2[4], l)
        elseif l isa Line5
            push!(p2[5], l)
        end
    end
    Picture2(p2)
end

function paint4(p::Picture2)
  s = 0.
  for l in p.objects
      for x in l
        s += f(paint(x))
      end
  end
  s
end

function paint4_sum(p::Picture2)
  s = 0.
  for l in p.objects
      s += sum(x->f(paint(x)),l)
  end
  s
end

function mysum(f,x)
  s = 0.
  for i in eachindex(x)
    s += f(x[i])
  end
  s
end

function paint4_mysum(p::Picture2)
  s = 0.
  for l in p.objects
      s += mysum(x->f(paint(x)),l)
  end
  s
end

# running

n = 1_000_000
x = rand(n)
line_types = [ Line1, Line2, Line3, Line4, Line5 ]
p = Picture([ line_types[rand(1:5)](x[i]) for i in 1:n ]);
p2 = buildp2(p)

@test paint(p) ≈ paint2(p) ≈ paint2_annotated(p) ≈ paint3(p) ≈ naivesum(x) ≈ paint4(p2) ≈ paint4_sum(p2) ≈ paint4_mysum(p2)

print(" with runtime dispatch: "); @btime paint($p) 
print(" with splitting: "); @btime paint2($p) 
print(" with annotated splitting: "); @btime paint2_annotated($p) 
print(" with functors: "); @btime paint3($p)
print(" with array of arrays, sum loop: "); @btime paint4($p2)
print(" with array of arrays, Base.sum: "); @btime paint4_sum($p2)
print(" with array of arrays, mysum: "); @btime paint4_mysum($p2)

# to compare with:

function naivesum(x)
  s = 0.
  for v in x
    s += f(v)
  end
  s
end
print(" simple sum of an array of Float64 of same size: "); @btime naivesum($x)

Benchmark:

 with runtime dispatch:   45.259 ms (1000000 allocations: 15.26 MiB)
 with splitting:   18.026 ms (0 allocations: 0 bytes)
 with annotated splitting:   17.790 ms (0 allocations: 0 bytes)
 with functors:   45.164 ms (1000000 allocations: 15.26 MiB)
 with array of arrays, sum loop:   71.294 ms (3997450 allocations: 76.26 MiB)
 with array of arrays, Base.sum:   6.962 ms (10 allocations: 160 bytes)
 with array of arrays, mysum:   7.447 ms (5 allocations: 80 bytes)
 simple sum of an array of Float64 of same size:   6.729 ms (0 allocations: 0 bytes)

1 Like