One thing that is important is that you have provided the hint to the compiler (and a function barrier, I guess) by using the sum
function. If one replaces that function with a loop:
function paint4(p::Picture2)
s = 0.
for l in p.objects
for x in l
s += f(paint(x))
end
end
s
end
Things get very bad, even worse than with any of the other alternatives:
with array of arrays, loop sum: 76.310 ms (3997450 allocations: 76.26 MiB)
I have implemented a naive mysum
function and with that I recover almost the same time of your test (thus, it is the splitting, and not other properties of the implementation of Base.sum
that make the difference).
Thus, complementing your post, the correct data structure is important, but we need to take advantage of that data structure to write type-stable function calls (for me, at least, that was not immediately obvious from your example, I could well imagine that the explicit loop would perform well, it is possible that with few types the compiler would split it automatically, now I can guess).
Anyway, I think all this is very didactic. One thing that I would like know is how the “single dispatch approach in C++” mentioned in the other thread relates to this problem, and with which benefits (or not).
Current code
using BenchmarkTools
using Test
abstract type LineAbstract end
struct Line1 <: LineAbstract length::Float64 end
struct Line2 <: LineAbstract length::Float64 end
struct Line3 <: LineAbstract length::Float64 end
struct Line4 <: LineAbstract length::Float64 end
struct Line5 <: LineAbstract length::Float64 end
struct Picture{T<:LineAbstract}
lines::Vector{T}
end
paint(l :: Line1) = l.length
paint(l :: Line2) = l.length
paint(l :: Line3) = l.length
paint(l :: Line4) = l.length
paint(l :: Line5) = l.length
# Function to evaluate
f(x::Float64) = sin(x)
# Dynamical dispatch at runtime
function paint(p :: Picture)
s = 0.
for l in p.lines
s += f(paint(l))
end
s
end
# Union splitting
function paint2(p::Picture)
s = 0.
for l in p.lines
if l isa Line1
s += f(paint(l))
elseif l isa Line2
s += f(paint(l))
elseif l isa Line3
s += f(paint(l))
elseif l isa Line4
s += f(paint(l))
elseif l isa Line5
s += f(paint(l))
end
end
s
end
# Union splitting with annotated calls
function paint2_annotated(p::Picture)
s = 0.
for l in p.lines
if l isa Line1
s += f(paint(l::Line1))
elseif l isa Line2
s += f(paint(l::Line2))
elseif l isa Line3
s += f(paint(l::Line3))
elseif l isa Line4
s += f(paint(l::Line4))
elseif l isa Line5
s += f(paint(l::Line5))
end
end
s
end
# Functors
(line::Line1)() = line.length
(line::Line2)() = line.length
(line::Line3)() = line.length
(line::Line4)() = line.length
(line::Line5)() = line.length
function paint3(p :: Picture)
s = 0.
for l in p.lines
s += f(l())
end
s
end
# good data structure
struct Picture2
objects::Vector
end
function buildp2(p)
p2 = [Line1[], Line2[], Line3[], Line4[], Line5[]]
for l in p.lines
if l isa Line1
push!(p2[1], l)
elseif l isa Line2
push!(p2[2], l)
elseif l isa Line3
push!(p2[3], l)
elseif l isa Line4
push!(p2[4], l)
elseif l isa Line5
push!(p2[5], l)
end
end
Picture2(p2)
end
function paint4(p::Picture2)
s = 0.
for l in p.objects
for x in l
s += f(paint(x))
end
end
s
end
function paint4_sum(p::Picture2)
s = 0.
for l in p.objects
s += sum(x->f(paint(x)),l)
end
s
end
function mysum(f,x)
s = 0.
for i in eachindex(x)
s += f(x[i])
end
s
end
function paint4_mysum(p::Picture2)
s = 0.
for l in p.objects
s += mysum(x->f(paint(x)),l)
end
s
end
# running
n = 1_000_000
x = rand(n)
line_types = [ Line1, Line2, Line3, Line4, Line5 ]
p = Picture([ line_types[rand(1:5)](x[i]) for i in 1:n ]);
p2 = buildp2(p)
@test paint(p) ≈ paint2(p) ≈ paint2_annotated(p) ≈ paint3(p) ≈ naivesum(x) ≈ paint4(p2) ≈ paint4_sum(p2) ≈ paint4_mysum(p2)
print(" with runtime dispatch: "); @btime paint($p)
print(" with splitting: "); @btime paint2($p)
print(" with annotated splitting: "); @btime paint2_annotated($p)
print(" with functors: "); @btime paint3($p)
print(" with array of arrays, sum loop: "); @btime paint4($p2)
print(" with array of arrays, Base.sum: "); @btime paint4_sum($p2)
print(" with array of arrays, mysum: "); @btime paint4_mysum($p2)
# to compare with:
function naivesum(x)
s = 0.
for v in x
s += f(v)
end
s
end
print(" simple sum of an array of Float64 of same size: "); @btime naivesum($x)
Benchmark:
with runtime dispatch: 45.259 ms (1000000 allocations: 15.26 MiB)
with splitting: 18.026 ms (0 allocations: 0 bytes)
with annotated splitting: 17.790 ms (0 allocations: 0 bytes)
with functors: 45.164 ms (1000000 allocations: 15.26 MiB)
with array of arrays, sum loop: 71.294 ms (3997450 allocations: 76.26 MiB)
with array of arrays, Base.sum: 6.962 ms (10 allocations: 160 bytes)
with array of arrays, mysum: 7.447 ms (5 allocations: 80 bytes)
simple sum of an array of Float64 of same size: 6.729 ms (0 allocations: 0 bytes)