using Random
function very_bad_its(intervals::Array{Float64,1})
j = rand()
idx = findfirst(x-> j <= x,intervals)
return idx
end
where intervals are the extremes of intervals of the cdf. I have stored only the extremes to (maybe) speedup.
Lets consider an example of discrete probability distributions (to better understand what I am referring):
p1 = 0.4, p2 = 0.2, p3 = 0.3, p4 = 0.1
so in this case intervals is the following array:
[0.4,0.6,0.9,1.0]
The algo perse has interesting ways of speedup, like for example ordering the probabilities in descending order. I have to sample the distributions many times, like 10^6.
So the first question is if there is yet an implementation in julia of this algo, clearly faster than the previous implementation.
Second question is if in the previous function I can modify a part of the code to speedup.
If the answer to the second question is no, then can i speedup by parallelizing the code?
I am asking because I have actually tried with @distributed and stored the result in an array (next in the code i have to use the samples), but the results are not worth. Clearly the assumption is that the parallel version is faster, and it can be false.
I didnt know, thanks!
I used Distributions.jl in the past, but DiscreteDistributions not AliasTable (didnt know of this).
I am comparing the two implementations with the following code:
using BenchmarkTools
using Distributions
function very_bad_its(intervals::Array{Float64,1})
j = rand()
idx = findfirst(x-> j <= x,intervals)
return idx
end
function create_intervals_cdf(weight::Array{Float64,1})
neighbor_weight = [0.0]
neighbor_weight = vcat(neighbor_weight,weight)
intervals = [[sum(neighbor_weight[1:i]),sum(neighbor_weight[1:i+1])] for i in 1:length(neighbor_weight)-1]
res_intervals = [intervals[i][2] for i in 1:length(intervals)]
return res_intervals
end
function create_test_probs()
return [0.1,0.4,0.2,0.3]
end
function dist_its()
probs = create_test_probs()
aliastable = Distributions.AliasTable(probs)
N = 10^6
results = rand(aliastable,N)
end
function my_its()
probs = create_test_probs()
intervals = create_intervals_cdf(probs)
N = 10^6
results = Array{Int64}(undef,N)
for x in 1:N
results[x] = very_bad_its(intervals)
end
end
@btime my_its()
@btime dist_its()
The results:
20.713 ms (43 allocations: 7.63 MiB)
19.661 ms (8 allocations: 7.63 MiB)
I don’t know if there is an error in the following code, because searchsortedfirst is actually what I was looking (doing some test with only arrays, it is faster than findfirst). Maybe its a problem in N
using BenchmarkTools
using Distributions
function create_intervals_cdf(weight::Array{Float64,1})
neighbor_weight = [0.0]
neighbor_weight = vcat(neighbor_weight,weight)
intervals = [[sum(neighbor_weight[1:i]),sum(neighbor_weight[1:i+1])] for i in 1:length(neighbor_weight)-1]
res_intervals = [intervals[i][2] for i in 1:length(intervals)]
return res_intervals
end
function create_test_probs()
return [0.1,0.4,0.2,0.3]
end
function my_bad_its(intervals::Array{Float64,1})
j = rand()
idx = findfirst(x-> j <= x,intervals)
return idx
end
function new_its(intervals::Array{Float64,1})
j = rand()
idx = searchsortedfirst(intervals,j)
return idx
end
function init_dist_its()
probs = create_test_probs()
aliastable = Distributions.AliasTable(probs)
return aliastable
end
function init_my_its()
probs = create_test_probs()
intervals = create_intervals_cdf(probs)
return intervals
end
function test_distributions(aliastable::Distributions.AliasTable)
N = 10^6
results = rand(aliastable,N)
return results
end
function test_new_its(intervals::Array{Float64,1})
N = 10^6
results = Array{Int64}(undef,N)
for x in 1:N
results[x] = new_its(intervals)
end
return results
end
function test_my_bad_its(intervals::Array{Float64,1})
N = 10^6
results = Array{Int64}(undef,N)
for x in 1:N
results[x] = my_bad_its(intervals)
end
return results
end
intervals = init_my_its()
aliastable = init_dist_its()
@btime test_my_bad_its(intervals)
@btime test_distributions(aliastable)
@btime test_new_its(intervals)
The results are:
23.026 ms (2 allocations: 7.63 MiB)
23.054 ms (2 allocations: 7.63 MiB)
27.182 ms (2 allocations: 7.63 MiB)