Comparison of Python / Julia benchmarks for a maths problem.
I am considering switching to Julia, but would like to know if it is worth it for such tasks.
Here is the Python code I used:
import random
import numpy as np
import matplotlib.pyplot as plt
import time
def benchmark_it(simul_func: callable, *args, **kwargs):
def wrapper(*args, **kwargs):
start = time.time()
simul_func(*args, **kwargs)
end = time.time()
return end - start
return wrapper
@benchmark_it
def run_simulations(n: int, k: int, n_iters: int) -> float:
hats = np.arange(n)
ideal = np.arange(n)
def did_k_matches_occur_in_the_simulation():
np.random.shuffle(hats)
matches = np.sum(hats == ideal)
return matches == k
return sum(did_k_matches_occur_in_the_simulation() for _ in range(n_iters)) / n_iters
n_people_to_try = [250, 1000, 5000, 10000]
times = [run_simulations(n_people, 0, 100_000) for n_people in n_people_to_try]
plt.scatter(n_people_to_try, times)
plt.xlabel("'n' in the simulation")
plt.ylabel('Time (s)')
plt.show()
I also attach the used Julia file:
using Random
using BenchmarkTools
using Plots
function run_simulations(n::Int, k::Int, n_simulations::Int)::Float64
# n: number of people
# k: number of simulations
# n_simulations: number of simulations to run
hats = collect(1:n)
ideal = collect(1:n)
function did_k_matches_occur_in_the_simulation()::Bool
shuffle!(hats)
sum(hats .== ideal) == k
end
sum((did_k_matches_occur_in_the_simulation() for _ in 1:n_simulations)) / n_simulations
end
function run_rigorous_simulations()
n_people_to_try::Array{Int, 1} = [250, 1000, 5000, 10000]
mean_times::Array{Float64, 1} = []
for n_people in n_people_to_try
b = @benchmark run_simulations($n_people, 0, 100_000)
push!(mean_times, mean(b).time / 1e9) # Convert to seconds
end
plot(n_people_to_try, mean_times,
xlabel="'n' in the simulation", ylabel="Execution Time (s)",
title="Performance of run_simulations", marker=:circle,
legend=false,)
end
run_rigorous_simulations()
The plots I got. Julia | Python
The plots show that Julia is only \approx [1.5,2] times faster than Python version. Is this an expected behavior or I used a slow Julia implementation?
If latter, please suggest the ways to optimize the code.
Thanks in advance!