I’ve been hacking https://github.com/JuliaLang/julialang.github.com/blob/master/benchmarks.ipynb to automate several steps in producing the microbenchmark plot on Juialang.org. I’ve gotten it to do what I want, but since this code appears on the Julia website, I’d be grateful if any Dataframe experts could help clean it up.
The code
- loads benchmarks.csv datafile as a DataFrame
- spiffs up language names with a Dict
- computes benchmark timings normalized by C times
- computes geometric mean of timings per language
- sorts the data, putting C 1st, Julia 2nd, then others sorted by geometric mean
benchmarks = readtable("benchmarks_subset.csv", header=false, names=[:language, :function, :time])
# Capitalize and decorate language names from datafile
dict = Dict("c"=>"C", "julia"=>"Julia", "lua"=>"LuaJIT", "fortran"=>"Fortran", "java"=>"Java",
"javascript"=>"JavaScript", "matlab"=>"Matlab", "mathematica"=>"Mathematica",
"python"=>"Python", "octave"=>"Octave", "r"=>"R", "go"=>"Go")
benchmarks[:language] = [dict[lang] for lang in benchmarks[:language]]
# Normalize benchmark times by C times
ctime = benchmarks[benchmarks[:language].== "C", :]
benchmarks = join(benchmarks, ctime, on=:function)
delete!(benchmarks, :language_1)
rename!(benchmarks, :time_1, :ctime)
benchmarks[:normtime] = benchmarks[:time] ./ benchmarks[:ctime];
# Compute the geometric mean for each language
langs = [];
means = [];
priorities = [];
for lang in values(dict)
data = benchmarks[benchmarks[:language].== lang, :]
gmean = geomean(data[:normtime])
push!(langs, lang)
push!(means, gmean)
if (lang == "C")
push!(priorities, 1)
elseif (lang == "Julia")
push!(priorities, 2)
else
push!(priorities, 3)
end
end
# Add the geometric means back into the benchmarks dataframe
langmean = DataFrame(language=langs, geomean = means, priority = priorities)
benchmarks = join(benchmarks, langmean, on=:language)
# Put C first, Julia second, and sort the rest by geometric mean
sort!(benchmarks, cols=[:priority, :geomean]);
And a minimal datafile, benchmarks_subset.csv
c,iteration_mandelbrot,0.266349
c,iteration_pi_sum,27.368069
julia,iteration_mandelbrot,0.163549
julia,iteration_pi_sum,27.368159
go,iteration_mandelbrot,0.18474092830000002
go,iteration_pi_sum,27.917825880000002
fortran,iteration_mandelbrot,.236753
fortran,iteration_pi_sum,27.367718
javascript,iteration_mandelbrot,0.084
javascript,iteration_pi_sum,27.4
matlab,iteration_mandelbrot,1.31600000
matlab,iteration_pi_sum,27.37700000