Union splitting vs C++

Up to 20 the results are roughly the same:

result = 499726.0713257201
 with runtime dispatch:   137.170 ms (2000000 allocations: 30.52 MiB)
 with splitting:   15.002 ms (0 allocations: 0 bytes)
 with compiler splitting:   366.458 ms (6998980 allocations: 122.05 MiB)
 with runtime dispatch 3:   157.443 ms (2000000 allocations: 30.52 MiB)
 with compiler splitting 3:   1.437 ms (0 allocations: 0 bytes)

(@Elrod solution seems too good to be true, I have the feeling of being tricked by the compiler here)

Code
module Lines

export test
export Line1, Line2, Line3, Line4, Line5, Line6, Line7, Line8, Line9, Line10
       Line11, Line12, Line13, Line14, Line15, Line16, Line17, Line18, Line19, Line20
export LineUnion, Picture, Picture2

using BenchmarkTools
using Test

abstract type LineAbstract end

struct Line1 <: LineAbstract length::Float64 end
struct Line2 <: LineAbstract length::Float64 end
struct Line3 <: LineAbstract length::Float64 end
struct Line4 <: LineAbstract length::Float64 end
struct Line5 <: LineAbstract length::Float64 end
struct Line6 <: LineAbstract length::Float64 end
struct Line7 <: LineAbstract length::Float64 end
struct Line8 <: LineAbstract length::Float64 end
struct Line9 <: LineAbstract length::Float64 end
struct Line10 <: LineAbstract length::Float64 end
struct Line11 <: LineAbstract length::Float64 end
struct Line12 <: LineAbstract length::Float64 end
struct Line13 <: LineAbstract length::Float64 end
struct Line14 <: LineAbstract length::Float64 end
struct Line15 <: LineAbstract length::Float64 end
struct Line16 <: LineAbstract length::Float64 end
struct Line17 <: LineAbstract length::Float64 end
struct Line18 <: LineAbstract length::Float64 end
struct Line19 <: LineAbstract length::Float64 end
struct Line20 <: LineAbstract length::Float64 end

paint(l :: Line1) = l.length
paint(l :: Line2) = l.length
paint(l :: Line3) = l.length
paint(l :: Line4) = l.length
paint(l :: Line5) = l.length
paint(l :: Line6) = l.length
paint(l :: Line7) = l.length
paint(l :: Line8) = l.length
paint(l :: Line9) = l.length
paint(l :: Line10) = l.length
paint(l :: Line11) = l.length
paint(l :: Line12) = l.length
paint(l :: Line13) = l.length
paint(l :: Line14) = l.length
paint(l :: Line15) = l.length
paint(l :: Line16) = l.length
paint(l :: Line17) = l.length
paint(l :: Line18) = l.length
paint(l :: Line19) = l.length
paint(l :: Line20) = l.length

struct Picture{T<:LineAbstract}
       lines::Vector{T}
end 

const LineUnion = Union{Line1,Line2,Line3,Line4,Line5,
                        Line6,Line7,Line8,Line9,Line10,
                        Line11,Line12,Line13,Line14,Line15,
                        Line16,Line17,Line18,Line19,Line20}
struct Picture2{T<:LineUnion}
       lines::Vector{T}
end 

# Dynamical dispatch at runtime

function paint1(p)
  s = 0.
  for l in p.lines
    s += paint(l)
  end
  s
end

function paint3(p)
  s = 0.
  @inbounds for i in eachindex(p.lines)
    s += paint(p.lines[i])
  end
  s
end

# Union splitting

function paint2(p)
  s = 0.
  for l in p.lines
    if l isa Line1 s += paint(l)
    elseif l isa Line2 s += paint(l)
    elseif l isa Line3 s += paint(l)
    elseif l isa Line4 s += paint(l)
    elseif l isa Line5 s += paint(l)
    elseif l isa Line6 s += paint(l)
    elseif l isa Line7 s += paint(l)
    elseif l isa Line8 s += paint(l)
    elseif l isa Line9 s += paint(l)
    elseif l isa Line10 s += paint(l)
    elseif l isa Line11 s += paint(l)
    elseif l isa Line12 s += paint(l)
    elseif l isa Line13 s += paint(l)
    elseif l isa Line14 s += paint(l)
    elseif l isa Line15 s += paint(l)
    elseif l isa Line16 s += paint(l)
    elseif l isa Line17 s += paint(l)
    elseif l isa Line18 s += paint(l)
    elseif l isa Line19 s += paint(l)
    elseif l isa Line20 s += paint(l)
    end
  end
  s
end

function test(n)

  line_types = [ Line1, Line2, Line3, Line4, Line5, 
                 Line6, Line7, Line8, Line9, Line10, 
                 Line11, Line12, Line13, Line14, Line15, 
                 Line16, Line17, Line18, Line19, Line20 ]
  p = Picture([line_types[rand(1:20)](rand()) for i in 1:n])
  p2 = Picture2(convert(Vector{LineUnion},p.lines))

  @test paint1(p) ≈ paint2(p)
  println("result = ", paint1(p))

  print(" with runtime dispatch: "); @btime paint1($p) 
  print(" with splitting: "); @btime paint2($p) 
  print(" with compiler splitting: "); @btime paint1($p2) 
  print(" with runtime dispatch 3: "); @btime paint3($p) 
  print(" with compiler splitting 3: "); @btime paint3($p2) 

end

end

using .Lines
test(1_000_000)

I’ve added the computation of a generic function (here a sin) on the elements, to be sure I was not being tricked, and I am not:

result = 4600.886933656946
 with runtime dispatch:   1.091 ms (20000 allocations: 312.50 KiB)
 with splitting:   197.816 μs (0 allocations: 0 bytes)
 with compiler splitting:   2.801 ms (68980 allocations: 1.21 MiB)
 with runtime dispatch 3:   1.196 ms (20000 allocations: 312.50 KiB)
 with compiler splitting 3:   169.995 μs (0 allocations: 0 bytes)
Code
module Lines

export test
export Line1, Line2, Line3, Line4, Line5, Line6, Line7, Line8, Line9, Line10
       Line11, Line12, Line13, Line14, Line15, Line16, Line17, Line18, Line19, Line20
export LineUnion, Picture, Picture2

using BenchmarkTools
using Test

abstract type LineAbstract end

struct Line1 <: LineAbstract length::Float64 end
struct Line2 <: LineAbstract length::Float64 end
struct Line3 <: LineAbstract length::Float64 end
struct Line4 <: LineAbstract length::Float64 end
struct Line5 <: LineAbstract length::Float64 end
struct Line6 <: LineAbstract length::Float64 end
struct Line7 <: LineAbstract length::Float64 end
struct Line8 <: LineAbstract length::Float64 end
struct Line9 <: LineAbstract length::Float64 end
struct Line10 <: LineAbstract length::Float64 end
struct Line11 <: LineAbstract length::Float64 end
struct Line12 <: LineAbstract length::Float64 end
struct Line13 <: LineAbstract length::Float64 end
struct Line14 <: LineAbstract length::Float64 end
struct Line15 <: LineAbstract length::Float64 end
struct Line16 <: LineAbstract length::Float64 end
struct Line17 <: LineAbstract length::Float64 end
struct Line18 <: LineAbstract length::Float64 end
struct Line19 <: LineAbstract length::Float64 end
struct Line20 <: LineAbstract length::Float64 end

paint(l :: Line1) = l.length
paint(l :: Line2) = l.length
paint(l :: Line3) = l.length
paint(l :: Line4) = l.length
paint(l :: Line5) = l.length
paint(l :: Line6) = l.length
paint(l :: Line7) = l.length
paint(l :: Line8) = l.length
paint(l :: Line9) = l.length
paint(l :: Line10) = l.length
paint(l :: Line11) = l.length
paint(l :: Line12) = l.length
paint(l :: Line13) = l.length
paint(l :: Line14) = l.length
paint(l :: Line15) = l.length
paint(l :: Line16) = l.length
paint(l :: Line17) = l.length
paint(l :: Line18) = l.length
paint(l :: Line19) = l.length
paint(l :: Line20) = l.length

f(l) = sin(paint(l))

struct Picture{T<:LineAbstract}
       lines::Vector{T}
end 

const LineUnion = Union{Line1,Line2,Line3,Line4,Line5,
                        Line6,Line7,Line8,Line9,Line10,
                        Line11,Line12,Line13,Line14,Line15,
                        Line16,Line17,Line18,Line19,Line20}
struct Picture2{T<:LineUnion}
       lines::Vector{T}
end 

# Dynamical dispatch at runtime

function paint1(p,f)
  s = 0.
  for l in p.lines
    s += f(l)
  end
  s
end

function paint3(p,f)
  s = 0.
  @inbounds for i in eachindex(p.lines)
    s += f(p.lines[i])
  end
  s
end

# Union splitting

function paint2(p,f)
  s = 0.
  for l in p.lines
    if l isa Line1 s += f(l)
    elseif l isa Line2 s += f(l)
    elseif l isa Line3 s += f(l)
    elseif l isa Line4 s += f(l)
    elseif l isa Line5 s += f(l)
    elseif l isa Line6 s += f(l)
    elseif l isa Line7 s += f(l)
    elseif l isa Line8 s += f(l)
    elseif l isa Line9 s += f(l)
    elseif l isa Line10 s += f(l)
    elseif l isa Line11 s += f(l)
    elseif l isa Line12 s += f(l)
    elseif l isa Line13 s += f(l)
    elseif l isa Line14 s += f(l)
    elseif l isa Line15 s += f(l)
    elseif l isa Line16 s += f(l)
    elseif l isa Line17 s += f(l)
    elseif l isa Line18 s += f(l)
    elseif l isa Line19 s += f(l)
    elseif l isa Line20 s += f(l)
    end
  end
  s
end

function test(n)

  line_types = [ Line1, Line2, Line3, Line4, Line5, 
                 Line6, Line7, Line8, Line9, Line10, 
                 Line11, Line12, Line13, Line14, Line15, 
                 Line16, Line17, Line18, Line19, Line20 ]
  p = Picture([line_types[rand(1:20)](rand()) for i in 1:n])
  p2 = Picture2(convert(Vector{LineUnion},p.lines))

  @test paint1(p,f) ≈ paint2(p,f) ≈ paint3(p,f)
  println("result = ", paint1(p,f))

  print(" with runtime dispatch: "); @btime paint1($p,$f) 
  print(" with splitting: "); @btime paint2($p,$f) 
  print(" with compiler splitting: "); @btime paint1($p2,$f) 
  print(" with runtime dispatch 3: "); @btime paint3($p,$f) 
  print(" with compiler splitting 3: "); @btime paint3($p2,$f) 

end

end

using .Lines
test(10_000)