You may check that this ones still compute the correct result and play with the arg size and thread numbers to see what is optimal for your computation:
using BenchmarkTools, Random, Distributions, Base.Threads
Random.seed!(123)
market_ids = repeat(1:1000,35)
delta = rand(Normal(1,5), 35000)
nu_bern = -1
randvar_nu = randn(35000,100)*5 .+ 6
randvar_nu_inattention= randn(35000,100)*5 .+ 6.5
mat_1 = similar(randvar_nu)
vec_1 = similar(delta)
function predict_shares_bern(delta, randvar_nu, randvar_nu_inattention, mat_1, vec_1, market_ids, nu_bern)
@views num = (mat_1 .= exp.(randvar_nu .+ delta ))
@threads for i in 1:length(market_ids)
@views num[market_ids .== i,:] .= num[market_ids .== i , :] ./ (sum(num[market_ids .== i,:],dims = 1) .+1)
end
vec_1 .= mean(num, dims = 2)
num .= exp.(randvar_nu_inattention .+ delta)
@threads for i in 1:length(market_ids)
@views num[market_ids .== i,:] .= num[market_ids .== i , :] ./ (sum(num[market_ids .== i,:],dims = 1) .+1)
end
share = (vec_1 .= vec_1 .* (exp(nu_bern)/(1+exp(nu_bern))) + mean(num, dims = 2) .* (1 - (exp(nu_bern)/(1+exp(nu_bern)))))
return share
end
function predict_shares_bern_bis(delta, randvar_nu, randvar_nu_inattention, mat_1, vec_1, market_ids, nu_bern)
@views num = (mat_1 .= exp.(randvar_nu .+ delta ))
for id in market_ids
@views num[id,:] .= num[id , :] ./ (sum(num[id,:],dims = 1) .+1)
end
vec_1 .= mean(num, dims = 2)
num .= exp.(randvar_nu_inattention .+ delta)
for id in market_ids
@views num[id,:] .= num[id , :] ./ (sum(num[id,:],dims = 1) .+1)
end
share = (vec_1 .= vec_1 .* (exp(nu_bern)/(1+exp(nu_bern))) + mean(num, dims = 2) .* (1 - (exp(nu_bern)/(1+exp(nu_bern)))))
return share
end
function predict_shares_bern_ter(delta, randvar_nu, randvar_nu_inattention, mat_1, vec_1, market_ids, nu_bern)
@views num = (mat_1 .= exp.(randvar_nu .+ delta ))
nj = size(num,2)
for id in market_ids
@views sumj = sum(num[id,:])
for j ∈ 1:nj
num[id,j] = num[id , j] / (sumj+1)
end
end
vec_1 .= mean(num, dims = 2)
num .= exp.(randvar_nu_inattention .+ delta)
for id in market_ids
@views sumj = sum(num[id,:])
for j ∈ 1:nj
num[id,j] = num[id , j] / (sumj+1)
end
end
share = (vec_1 .= vec_1 .* (exp(nu_bern)/(1+exp(nu_bern))) + mean(num, dims = 2) .* (1 - (exp(nu_bern)/(1+exp(nu_bern)))))
return share
end
function predict_shares_bern_4(delta, randvar_nu, randvar_nu_inattention, mat_1, vec_1, market_ids, nu_bern)
@views num = (mat_1 .= exp.(randvar_nu .+ delta ))
nj = size(num,2)
@threads for id in market_ids
@views sumj = sum(num[id,:])
for j ∈ 1:nj
num[id,j] = num[id , j] / (sumj+1)
end
end
vec_1 .= mean(num, dims = 2)
num .= exp.(randvar_nu_inattention .+ delta)
@threads for id in market_ids
@views sumj = sum(num[id,:])
for j ∈ 1:nj
num[id,j] = num[id , j] / (sumj+1)
end
end
share = (vec_1 .= vec_1 .* (exp(nu_bern)/(1+exp(nu_bern))) + mean(num, dims = 2) .* (1 - (exp(nu_bern)/(1+exp(nu_bern)))))
return share
end
s1=predict_shares_bern(delta, randvar_nu, randvar_nu_inattention, mat_1, vec_1, market_ids, nu_bern)
s2=predict_shares_bern_bis(delta, randvar_nu, randvar_nu_inattention, mat_1, vec_1, market_ids, nu_bern)
s3=predict_shares_bern_ter(delta, randvar_nu, randvar_nu_inattention, mat_1, vec_1, market_ids, nu_bern)
s4=predict_shares_bern_ter(delta, randvar_nu, randvar_nu_inattention, mat_1, vec_1, market_ids, nu_bern)
@assert s1 ≈ s2
@assert s1 ≈ s3
@assert s1 ≈ s4
@btime predict_shares_bern($delta, $randvar_nu, $randvar_nu_inattention, $mat_1, $vec_1, $market_ids, $nu_bern)
@btime predict_shares_bern_bis($delta, $randvar_nu, $randvar_nu_inattention, $mat_1, $vec_1, $market_ids, $nu_bern)
@btime predict_shares_bern_ter($delta, $randvar_nu, $randvar_nu_inattention, $mat_1, $vec_1, $market_ids, $nu_bern)
@btime predict_shares_bern_4($delta, $randvar_nu, $randvar_nu_inattention, $mat_1, $vec_1, $market_ids, $nu_bern)
460.482 ms (1262128 allocations: 1.88 GiB) #orig
42.382 ms (280030 allocations: 15.22 MiB) #bis
34.087 ms (30 allocations: 1.34 MiB) #ter
26.706 ms (129 allocations: 1.35 MiB) #4 (with 8 threads)