Thanks for the brief on BenchmarkTools!
Actually, the numbers I get from @belapsed
appears to be stable. The ratio always seem to land close to either 1.05 or to 1.10; the numbers I posted were not cherry-picked.
For example, changing to
for i = 1:10
@show i
for n=200:20:300
@show n, bench(n)
end
end
I get
i = 1
(n, bench(n)) = (200, (1.059105419604759, 2.47e-7, 2.33e-7))
(n, bench(n)) = (220, (1.1169674264306346, 2.85e-7, 2.55e-7))
(n, bench(n)) = (240, (1.069635126777984, 3.07e-7, 2.87e-7))
(n, bench(n)) = (260, (1.115670074219297, 3.35e-7, 3.0e-7))
(n, bench(n)) = (280, (1.0594082334289328, 3.45e-7, 3.26e-7))
(n, bench(n)) = (300, (1.103136412176504, 3.87e-7, 3.51e-7))
i = 2
(n, bench(n)) = (200, (1.0582788259641385, 2.46e-7, 2.33e-7))
(n, bench(n)) = (220, (1.1114801668710947, 2.85e-7, 2.56e-7))
(n, bench(n)) = (240, (1.0708995589581427, 3.1e-7, 2.9e-7))
(n, bench(n)) = (260, (1.1171171171171173, 3.35e-7, 3.0e-7))
(n, bench(n)) = (280, (1.0556340121896834, 3.41e-7, 3.23e-7))
(n, bench(n)) = (300, (1.1086707410236822, 3.86e-7, 3.48e-7))
i = 3
(n, bench(n)) = (200, (1.057655254187046, 2.46e-7, 2.33e-7))
(n, bench(n)) = (220, (1.1151592455163886, 2.85e-7, 2.55e-7))
(n, bench(n)) = (240, (1.0550129230029452, 3.08e-7, 2.92e-7))
(n, bench(n)) = (260, (1.115670074219297, 3.35e-7, 3.0e-7))
(n, bench(n)) = (280, (1.055371768699117, 3.41e-7, 3.23e-7))
(n, bench(n)) = (300, (1.0986128024567163, 3.88e-7, 3.53e-7))
i = 4
(n, bench(n)) = (200, (1.0596853679833065, 2.47e-7, 2.33e-7))
(n, bench(n)) = (220, (1.1106500691562933, 2.85e-7, 2.56e-7))
(n, bench(n)) = (240, (1.0704536925477495, 3.1e-7, 2.9e-7))
(n, bench(n)) = (260, (1.1172220085544793, 3.36e-7, 3.01e-7))
(n, bench(n)) = (280, (1.0602796213156354, 3.42e-7, 3.23e-7))
(n, bench(n)) = (300, (1.1026798679867988, 3.9e-7, 3.54e-7))
i = 5
(n, bench(n)) = (200, (1.0583630967352484, 2.47e-7, 2.33e-7))
(n, bench(n)) = (220, (1.1109747721467211, 2.84e-7, 2.56e-7))
(n, bench(n)) = (240, (1.0617967276929627, 3.09e-7, 2.91e-7))
(n, bench(n)) = (260, (1.1148175429981175, 3.36e-7, 3.01e-7))
(n, bench(n)) = (280, (1.0604426839779206, 3.43e-7, 3.23e-7))
(n, bench(n)) = (300, (1.099881188118812, 3.89e-7, 3.54e-7))
i = 6
(n, bench(n)) = (200, (1.0554535294204532, 2.46e-7, 2.33e-7))
(n, bench(n)) = (220, (1.1143280519776586, 2.85e-7, 2.56e-7))
(n, bench(n)) = (240, (1.0622056764668448, 3.1e-7, 2.92e-7))
(n, bench(n)) = (260, (1.1133150934792522, 3.35e-7, 3.01e-7))
(n, bench(n)) = (280, (1.0656216229644626, 3.44e-7, 3.23e-7))
(n, bench(n)) = (300, (1.109878305889888, 3.87e-7, 3.48e-7))
i = 7
(n, bench(n)) = (200, (1.0540566398775353, 2.46e-7, 2.33e-7))
(n, bench(n)) = (220, (1.1132480092307926, 2.85e-7, 2.56e-7))
(n, bench(n)) = (240, (1.066147859922179, 3.09e-7, 2.89e-7))
(n, bench(n)) = (260, (1.1114110244264885, 3.36e-7, 3.02e-7))
(n, bench(n)) = (280, (1.0601851851851853, 3.46e-7, 3.26e-7))
(n, bench(n)) = (300, (1.0902104780014426, 3.9e-7, 3.58e-7))
i = 8
(n, bench(n)) = (200, (1.0571566797981893, 2.46e-7, 2.33e-7))
(n, bench(n)) = (220, (1.1163452772756068, 2.85e-7, 2.56e-7))
(n, bench(n)) = (240, (1.0534270759129059, 3.1e-7, 2.95e-7))
(n, bench(n)) = (260, (1.1091549091247044, 3.36e-7, 3.03e-7))
(n, bench(n)) = (280, (1.0609731926069073, 3.45e-7, 3.25e-7))
(n, bench(n)) = (300, (1.0962927618063012, 3.88e-7, 3.54e-7))
i = 9
(n, bench(n)) = (200, (1.060763288366194, 2.47e-7, 2.33e-7))
(n, bench(n)) = (220, (1.1128090925961693, 2.84e-7, 2.56e-7))
(n, bench(n)) = (240, (1.0528404658610353, 2.94e-7, 2.79e-7))
(n, bench(n)) = (260, (1.1114110244264885, 3.36e-7, 3.02e-7))
(n, bench(n)) = (280, (1.0486630148609362, 3.43e-7, 3.27e-7))
(n, bench(n)) = (300, (1.07691203902999, 3.91e-7, 3.63e-7))
i = 10
(n, bench(n)) = (200, (1.055011655011655, 2.46e-7, 2.34e-7))
(n, bench(n)) = (220, (1.106808983977469, 2.85e-7, 2.58e-7))
(n, bench(n)) = (240, (1.0547503571697119, 2.96e-7, 2.81e-7))
(n, bench(n)) = (260, (1.107662103055896, 3.36e-7, 3.03e-7))
(n, bench(n)) = (280, (1.0569057789561545, 3.43e-7, 3.25e-7))
(n, bench(n)) = (300, (1.0881835847136114, 3.88e-7, 3.57e-7))
The more low-tech benchmark
function bench(f!::Function, n::Int, m::Int)
a = [[i] for i=1:2n]
b = similar(a)
for i = 1:m
f!(b, a)
end
return
end
@time bench(f_a!, 300, 10_000_000)
@time bench(f_a!, 300, 10_000_000)
@time bench(f_a!, 300, 10_000_000)
@time bench(f_b!, 300, 10_000_000)
@time bench(f_b!, 300, 10_000_000)
@time bench(f_b!, 300, 10_000_000)
gives me
3.242679 seconds (602 allocations: 47.125 KiB)
3.230558 seconds (602 allocations: 47.125 KiB)
3.236618 seconds (602 allocations: 47.125 KiB)
3.023927 seconds (602 allocations: 47.125 KiB)
3.019529 seconds (602 allocations: 47.125 KiB)
3.027147 seconds (602 allocations: 47.125 KiB)
I ran all this from scripts, like julia --project=. --optimize=3 benchmark.jl
, while I notice that you ran from the REPL. Can this make any difference?
Perhaps more likely is that the difference is caused by difference in hardware and/or in Julia version.
From (in the REPL)
function bench(f::Function, n::Int)
a = [[i] for i=1:2n]
b = similar(a)
@benchmark $f($b, $a)
end
bench(f_a!, 200)
bench(f_b!, 200)
I have
julia> bench(f_a!, 200)
BenchmarkTools.Trial: 10000 samples with 390 evaluations.
Range (min … max): 245.897 ns … 596.667 ns ┊ GC (min … max): 0.00% … 0.00%
Time (median): 247.179 ns ┊ GC (median): 0.00%
Time (mean ± σ): 252.319 ns ± 9.886 ns ┊ GC (mean ± σ): 0.00% ± 0.00%
▂▇█▅▃ ▁▃▂ ▁▂▁ ▃▆▆▂▂ ▂ ▂
█████▇▅▄▅▅▄▃▃▇███▆███▆▅▃▅▄▇█▆▆▄▁▄▄▄▇██████▆▄▄▄▄▆▇████▇█▆▅▄▄▆▄ █
246 ns Histogram: log(frequency) by time 273 ns <
Memory estimate: 0 bytes, allocs estimate: 0.
julia> bench(f_b!, 200)
BenchmarkTools.Trial: 10000 samples with 460 evaluations.
Range (min … max): 233.043 ns … 475.435 ns ┊ GC (min … max): 0.00% … 0.00%
Time (median): 234.130 ns ┊ GC (median): 0.00%
Time (mean ± σ): 239.150 ns ± 9.005 ns ┊ GC (mean ± σ): 0.00% ± 0.00%
▂▇█▄ ▂▃▁▁ ▁ ▂▆▆▃▂▁ ▂▁ ▂
████▇▆▄▄▃▁▁▁▁▄▁▅████▅▄▅▇███▆▆▅▄▄▆██████▇▅▅▅▄▅▆▇▇▇███▇▅▁▇▇█▆▆▆ █
233 ns Histogram: log(frequency) by time 257 ns <
Memory estimate: 0 bytes, allocs estimate: 0.