Indeed, that speeds up by a factor of 2 both alternatives. Copying the solution from here: Random number and parallel execution - #16 by greg_plowman
We have:
julia> Threads.nthreads()
4
julia> @btime estimate_pi_thread(10000000)
35.675 ms (22 allocations: 1.98 KiB)
3.1413444
julia> @btime estimate_pi_floop(10000000)
12.244 ms (57 allocations: 2.77 KiB)
3.1405184
But the code is quite boring for that:
Code
using Random, Future, BenchmarkTools, FLoops
function parallel_rngs(rng::MersenneTwister, n::Integer)
step = big(10)^20
rngs = Vector{MersenneTwister}(undef, n)
rngs[1] = copy(rng)
for i = 2:n
rngs[i] = Future.randjump(rngs[i-1], step)
end
return rngs
end
const N = Threads.nthreads() * 10^8
const rng = MersenneTwister();
const rngs = parallel_rngs(MersenneTwister(), Threads.nthreads());
function estimate_pi_thread(nMC)
radius = 1.
diameter = 2. * radius
n_circle = zeros(Int,Threads.nthreads())
Threads.@threads for i in 1:nMC
rng = rngs[Threads.threadid()]
x = (rand(rng) - 0.5) * diameter
y = (rand(rng) - 0.5) * diameter
r = sqrt(x^2 + y^2)
if r <= radius
n_circle[Threads.threadid()] += 1
end
end
return (sum(n_circle) / nMC) * 4.
end
function estimate_pi_floop(nMC)
radius = 1.
diameter = 2. * radius
@floop for i in 1:nMC
rng = rngs[Threads.threadid()]
x = (rand(rng) - 0.5) * diameter
y = (rand(rng) - 0.5) * diameter
r = sqrt(x^2 + y^2)
if r <= radius
@reduce(n_circle += 1)
end
end
return (n_circle / nMC) * 4.
end