I am playing with the code, and it seems that this generates a random number which is usually very large (not in the [0,1] range). If that is happening here it might be that the vector is all of the same type there (I don’t know if that makes any difference). What I wanted is to test with more types.
Edit:
If I change this in your code:
for (int i = 0; i < n; i++) {
double r = ((double) rand() / (RAND_MAX));
x.push_back(r);
}
std::vector<LineAbstract*> lines;
for (int i = 0; i < n; i++)
{
// Need the casts to make ? : expression get accepted
double r = ((double) rand() / (RAND_MAX));
lines.push_back(r < 0.5 ? (LineAbstract*)new LineA(x[i]) : (LineAbstract*)new LineB(x[i]));
}
To be sure that the random number is in the [0,1] interval, the time of dynamic dispatch doubles:
Original:
dynamic dispatch : 0.003501 us per iteration (3.501000 ms total) [1073756018481283.000000]
new:
dynamic dispatch : 0.006574 us per iteration (6.574000 ms total) [500006.610053]
In Julia I get:
with dynamic dispatch: 7.334 ms (0 allocations: 0 bytes)
with splitting: 6.304 ms (0 allocations: 0 bytes)
with functors: 6.879 ms (0 allocations: 0 bytes)
simple sum of an array of Float64 of same size: 7.201 ms (0 allocations: 0 bytes)
But note that even in Julia here the non-allocating execution indicates that splitting is occurring automatically. This is why the test evolved to something more “realistic” with 5 different types, and to avoid compiler tricks we also computed the sin
of the field instead of simply returning the field, here.
Edit2: Introducing more types does seem to create a large overhead in Julia, and a much smaller one on C++:
Julia:
Summary
using BenchmarkTools
using Test
abstract type LineAbstract end
struct Line1 <: LineAbstract length::Float64 end
struct Line2 <: LineAbstract length::Float64 end
struct Line3 <: LineAbstract length::Float64 end
struct Line4 <: LineAbstract length::Float64 end
struct Line5 <: LineAbstract length::Float64 end
struct Picture{T<:LineAbstract}
lines::Vector{T}
end
paint(l :: Line1) = l.length
paint(l :: Line2) = l.length
paint(l :: Line3) = l.length
paint(l :: Line4) = l.length
paint(l :: Line5) = l.length
# Dynamical dispatch at runtime
function paint(p :: Picture)
s = 0.
for l in p.lines
s += paint(l)
end
s
end
# Union splitting
function paint2(p::Picture)
s = 0.
for l in p.lines
if l isa Line1
s += paint(l)
elseif l isa Line2
s += paint(l)
elseif l isa Line3
s += paint(l)
elseif l isa Line4
s += paint(l)
elseif l isa Line5
s += paint(l)
end
end
s
end
# Union splitting with annotated calls
function paint2_annotated(p::Picture)
s = 0.
for l in p.lines
if l isa Line1
s += paint(l::Line1)
elseif l isa Line2
s += paint(l::Line2)
elseif l isa Line3
s += paint(l::Line3)
elseif l isa Line4
s += paint(l::Line4)
elseif l isa Line5
s += paint(l::Line5)
end
end
s
end
# Functors
(line::Line1)() = line.length
(line::Line2)() = line.length
(line::Line3)() = line.length
(line::Line4)() = line.length
(line::Line5)() = line.length
function paint3(p :: Picture)
s = 0.
for l in p.lines
s += l()
end
s
end
# good data structure
struct Picture2
objects::Vector
end
function buildp2(p)
p2 = [Line1[], Line2[], Line3[], Line4[], Line5[]]
for l in p.lines
if l isa Line1
push!(p2[1], l)
elseif l isa Line2
push!(p2[2], l)
elseif l isa Line3
push!(p2[3], l)
elseif l isa Line4
push!(p2[4], l)
elseif l isa Line5
push!(p2[5], l)
end
end
Picture2(p2)
end
function paint4(p::Picture2)
s = 0.
for l in p.objects
for x in l
s += paint(x)
end
end
s
end
function paint4_sum(p::Picture2)
s = 0.
for l in p.objects
s += sum(x->paint(x),l)
end
s
end
function mysum(f,x)
s = 0.
for i in eachindex(x)
s += f(x[i])
end
s
end
function paint4_mysum(p::Picture2)
s = 0.
for l in p.objects
s += mysum(x->paint(x),l)
end
s
end
# running
n = 1_000_000
x = rand(n)
line_types = [ Line1, Line2, Line3, Line4, Line5 ]
p = Picture([ line_types[rand(1:5)](x[i]) for i in 1:n ]);
p2 = buildp2(p)
@test paint(p) ≈ paint2(p) ≈ paint2_annotated(p) ≈ paint3(p) ≈ paint4(p2) ≈ paint4_sum(p2) ≈ paint4_mysum(p2)
print(" with runtime dispatch: "); @btime paint($p)
print(" with splitting: "); @btime paint2($p)
print(" with annotated splitting: "); @btime paint2_annotated($p)
print(" with functors: "); @btime paint3($p)
print(" with array of arrays, sum loop: "); @btime paint4($p2)
print(" with array of arrays, Base.sum: "); @btime paint4_sum($p2)
print(" with array of arrays, mysum: "); @btime paint4_mysum($p2)
C++ (hope I didn’t do something wrong):
Summary
// g++ -o paint -O3 -march=native -W -Wall paint.cpp
#include <sys/time.h>
#include <cstdio>
#include <stdlib.h>
#include <vector>
struct LineAbstract
{
virtual double paint() =0;
};
struct LineA : public LineAbstract
{
LineA(double w) { width = w; }
virtual double paint() { return width; }
double width;
};
struct LineB : public LineAbstract
{
LineB(double l) { length = l; }
virtual double paint() { return length; }
double length;
};
struct LineC : public LineAbstract
{
LineC(double l) { length = l; }
virtual double paint() { return length; }
double length;
};
struct LineD : public LineAbstract
{
LineD(double l) { length = l; }
virtual double paint() { return length; }
double length;
};
struct LineE : public LineAbstract
{
LineE(double l) { length = l; }
virtual double paint() { return length; }
double length;
};
struct Picture
{
Picture(std::vector<LineAbstract*> l) { lines = l; }
std::vector<LineAbstract*> lines;
};
// Dynamic dispatch at runtime
double
paint(Picture& p)
{
double s = 0.0;
for (auto l : p.lines)
s += l->paint();
return s;
}
//// Union splitting
//
//double
//paint2(Picture& p)
//{
// double s = 0.0;
// LineA *a;
// LineB *b;
// LineC *c;
// LineD *d;
// LineE *e;
//
// for (auto l : p.lines)
// {
// a = dynamic_cast<LineA*>(l);
// if (a != nullptr)
// s += a->paint();
// else
// {
// b = dynamic_cast<LineB*>(l);
// if (b != nullptr)
// s += b->paint();
// }
// }
//
// return s;
//}
// Functors
double line(LineA *a) { return a->width; }
double line(LineB *b) { return b->length; }
double line(LineC *c) { return c->length; }
double line(LineD *d) { return d->length; }
double line(LineE *e) { return e->length; }
double
paint3(Picture& p)
{
double s = 0.0;
for (auto l : p.lines)
{
if (dynamic_cast<LineA*>(l))
s += line(static_cast<LineA*>(l));
else if (dynamic_cast<LineB*>(l))
s += line(static_cast<LineB*>(l));
else if (dynamic_cast<LineC*>(l))
s += line(static_cast<LineC*>(l));
else if (dynamic_cast<LineD*>(l))
s += line(static_cast<LineD*>(l));
else if (dynamic_cast<LineE*>(l))
s += line(static_cast<LineE*>(l));
}
return s;
}
// Naive sums
double
naivesum(std::vector<double> x)
{
double s = 0.0;
for (auto v : x)
s += v;
return s;
}
double
naivesum2(std::vector<double> x)
{
double s = 0.0;
for (unsigned int i = 0; i < x.size(); i++)
s += x[i];
return s;
}
double tdiff(struct timeval t0, struct timeval t1)
{
return (t1.tv_sec - t0.tv_sec) + (t1.tv_usec - t0.tv_usec)/1000000.0;
}
int main()
{
const int n = 1000000;
std::vector<double> x;
for (int i = 0; i < n; i++) {
double r = ((double) rand() / (RAND_MAX));
x.push_back(r);
}
std::vector<LineAbstract*> lines;
for (int i = 0; i < n; i++)
{
double r = ((double) rand() / (RAND_MAX));
if (r <= 0.2)
lines.push_back((LineAbstract*)new LineA(x[i]));
else if (r <= 0.4)
lines.push_back((LineAbstract*)new LineB(x[i]));
else if (r <= 0.6)
lines.push_back((LineAbstract*)new LineC(x[i]));
else if (r <= 0.8)
lines.push_back((LineAbstract*)new LineD(x[i]));
else if (r <= 1.0)
lines.push_back((LineAbstract*)new LineE(x[i]));
}
Picture p(lines);
struct timeval t0, t1;
double t, res;
// Store resulting sum, to make sure the calls aren't erased
gettimeofday(&t0, NULL);
res = paint(p);
gettimeofday(&t1, NULL);
t = tdiff(t0, t1);
printf("dynamic dispatch : %.6f us per iteration (%.6f ms total) [%.6f]\n", t/n*1000000, t*1000, res);
// gettimeofday(&t0, NULL);
// res = paint2(p);
// gettimeofday(&t1, NULL);
// t = tdiff(t0, t1);
// printf("union splitting : %.6f us per iteration (%.6f ms total) [%.6f]\n", t/n*1000000, t*1000, res);
gettimeofday(&t0, NULL);
res = paint3(p);
gettimeofday(&t1, NULL);
t = tdiff(t0, t1);
printf("functors : %.6f us per iteration (%.6f ms total) [%.6f]\n", t/n*1000000, t*1000, res);
gettimeofday(&t0, NULL);
res = naivesum(x);
gettimeofday(&t1, NULL);
t = tdiff(t0, t1);
printf("naive sum : %.6f us per iteration (%.6f ms total) [%.6f]\n", t/n*1000000, t*1000, res);
gettimeofday(&t0, NULL);
res = naivesum2(x);
gettimeofday(&t1, NULL);
t = tdiff(t0, t1);
printf("naive sum 2 : %.6f us per iteration (%.6f ms total) [%.6f]\n", t/n*1000000, t*1000, res);
}
Results:
Julia:
julia disp.jl
with runtime dispatch: 58.338 ms (2000000 allocations: 30.52 MiB)
with splitting: 10.312 ms (0 allocations: 0 bytes)
with annotated splitting: 10.581 ms (0 allocations: 0 bytes)
with functors: 58.673 ms (2000000 allocations: 30.52 MiB)
with array of arrays, sum loop: 88.891 ms (4997450 allocations: 91.51 MiB)
with array of arrays, Base.sum: 288.108 μs (10 allocations: 160 bytes)
with array of arrays, mysum: 1.052 ms (10 allocations: 160 bytes)
C++
./disp
dynamic dispatch : 0.008498 us per iteration (8.498000 ms total) [500006.610053]
functors : 0.045593 us per iteration (45.593000 ms total) [500006.610053]
naive sum : 0.004465 us per iteration (4.465000 ms total) [500006.610053]
naive sum 2 : 0.001772 us per iteration (1.772000 ms total) [500006.610053]
In these conditions C++ indeed does a better job. (but the message of changing the data structure can improve performance dramatically, as shown by the Base.sum
example of the Julia code).