Hello, I hope you guys are safe and healthy in these unprecedented times. I am trying to improve the performance of a loop, in terms of both time and memory usage, that attempts to find a maximum among a number of alternatives that grows exponentially with parameter K. I cannot parallelize this because it is part of an outer loop that is already being parallelized.

I would be super happy if I could push this code to perform with a value of K around 14 or 15 within a reasonable time. I guess it sounds heroic but I am sure that my code is very very far away from being efficient. Any help will be very much appreciated.

```
using LinearAlgebra
using BitOperations, Profile
J = 1000
T = 200
R=125*rand(J,T)
Î = 100*rand(J,T)
D = rand([0],J,1)
S = 20*rand(J,T)
Ď„ = 199
K = 8
Ind = vec(sort(rand((1:J),K,1),dims=1))
p = 0.8
SeqShocks = (rand(J,T).>p).+1
Jáµ˘ = Ind[1:K-1]
Dáµ˘ = zeros(J)
matchrow(a,B) = findfirst(i->all(j->a[j] == B[i,j],1:size(B,2)),1:size(B,1))
@views function pshockprob(sequences,prob)
(KK,K)=size(sequences)
proba = zeros(Float32,KK)
for i=1:KK
proba[i]=prob^(sum(sequences[i,:]))*(1-prob)^(K-sum(sequences[i,:]))
end
return proba
end
@views function shocks2ind(sequences,shockpath)
(K,T)=size(shockpath)
inds = zeros(T)
shocks=copy(shockpath)
replace!(shocks, 2=>0)
for i=1:T
inds[i]=matchrow(shocks[:,i]',sequences)
end
return convert(Array{Int32}, inds)
end
@views function auxfun(Ind,K,Choices,option)
if option == 1
return 0.5*sum(Choices), 0
else
return 0.6*sum(Choices), K
end
end
@views function fun_inside(Ď„,Jáµ˘,Ind,R,Î ,S,SeqShocks,Dáµ˘,K,J,T,option;Î¦=2,prob=0.8)
#println("Backward Induction with Dependent Countries")
Î˛ = 0.9
R = R[Ind,:]
Î = Î [Ind,1]
S = S[Ind,1]
UnexpShocks = SeqShocks[Ind,:]
Indices = ones(Int32,2^K,2^K,T)
v = Array{Float32}(undef,2^K,2^K)
Dstar = zeros(Int8,K,1)
Dstar[:,1]=Dáµ˘[Ind]
ChoiceSet = Array{Union{Nothing, Int8}}(nothing,2^K,K)
ShockSet = Array{Union{Nothing, Int8}}(nothing,2^K,K)
indexdiff = findall(in(setdiff(Ind,Jáµ˘)),Ind)
Dub = ones(J,T)
let int_s = fill(false,K) #pre-allocate output
for j=0:2^K-1
@. int_s = bget(j,0:K-1)
ChoiceSet[j+1,:]=int_s
ShockSet[j+1,:]=int_s
end
end
combindex = Int[ Dub[setdiff(Ind,Jáµ˘),Ď„] == ChoiceSet[i,indexdiff] for i=1:size(ChoiceSet,1) ]
pstatevec=pshockprob(ShockSet,prob)
indshocks=shocks2ind(ShockSet,UnexpShocks)
v_previous = zeros(Float32,2^K)
temp = zeros(Float32, K, 2^K)
Choice = Float32.(ChoiceSet)
Choice = Choice.*ShockSet
if option == 1
x1 = Array{Float32}(undef,2^K)
x2 = Array{Float32}(undef,2^K)
for j=1:2^K
tempeg = auxfun(Ind,K,ChoiceSet[j,:],option)
x1[j]=tempeg[1]
x2[j]=tempeg[2]
end
elseif option == 2
x1 = Array{Float32}(undef,2^K)
x2 = Array{Float32}(undef,2^K)
for j=1:2^K
tempeg =auxfun(Ind,K,ChoiceSet[j,:],option)
x1[j]=tempeg[1]
x2[j]=tempeg[2]
end
end
if option == 1
temp1WC = Array{Float32}(undef,2^K,2^K)
Vlbs = rand(2^K,2^K)
mul!(v_previous, Vlbs, pstatevec)
v_previous .*= Î˛
@fastmath @inbounds for t=0:T-Ď„
if R[:,T-t]!=0
temp1 = Array{Float32}(undef,2^K,2^K)
temp2 = Array{Float32}(undef,2^K,2^K)
vc = Array{Float32}(undef,2^K,2^K)
temp1 .= v_previous .+ x1
vcWC = Array{Float32}(undef,2^K,2^K)
temp1WC .= v_previous .+ x2
for i=1:2^K
for iii=1:2^K
for j=1:K
temp[j,iii] = exp(Î¦*ChoiceSet[i,j])*R[j,T-t]-Î [j]+S[j]*ChoiceSet[i,j]-(1-ShockSet[iii,j])*99999999999999999999999999999999999999
end
end
mul!(temp2, Choice, temp)
for iii=1:2^K
best = -Inf
bestWC = -Inf
tempind = 1
tempindWC = 1
for ii=1:2^K
if t == T-Ď„
if combindex[ii] == 1 && indshocks[Ď„] == iii
vc[ii,iii] = temp2[ii,iii]+temp1[ii]
vcWC[ii,iii] = temp2[ii,iii]+temp1WC[ii]
if vc[ii,iii] > best
best = vc[ii,iii]
tempind = ii
end
if vcWC[ii,iii] > bestWC
bestWC = vcWC[ii,iii]
tempindWC = ii
end
end
else
vc[ii,iii] = temp2[ii,iii]+temp1[ii]
vcWC[ii,iii] = temp2[ii,iii]+temp1WC[ii]
if vc[ii,iii] > best
best = vc[ii,iii]
tempind = ii
end
if vcWC[ii,iii] > bestWC
bestWC = vcWC[ii,iii]
tempindWC = ii
end
end
end
Indices[i,iii,T-t] = tempind
v[i,iii]= bestWC
end
end
mul!(v_previous, v, pstatevec)
v_previous .*= Î˛
end
end
elseif option == 2
temp1PC = Array{Float32}(undef,2^K,2^K)
Vubs = rand(2^K,2^K)
mul!(v_previous, Vubs, pstatevec)
v_previous .*= Î˛
@fastmath @inbounds for t=0:T-Ď„
if R[:,T-t]!=0
temp1 = Array{Float32}(undef,2^K,2^K)
temp2 = Array{Float32}(undef,2^K,2^K)
vc = Array{Float32}(undef,2^K,2^K)
vcPC = Array{Float32}(undef,2^K,2^K)
temp1 .= v_previous .+ x1
temp1PC .= v_previous .+ x2
for i=1:2^K
for iii=1:2^K
for j=1:K
temp[j,iii] = exp(Î¦*ChoiceSet[i,j])*R[j,T-t]-Î [j]+S[j]*ChoiceSet[i,j]-(1-ShockSet[iii,j])*99999999999999999999999999999999999999
end
end
mul!(temp2, Choice, temp)
for iii=1:2^K
best = -Inf
bestPC = -Inf
tempind = 1
tempindPC = 1
for ii=1:2^K
if t == T-Ď„
if combindex[ii] == 1 && indshocks[Ď„] == iii
vc[ii,iii] = temp2[ii,iii]+temp1[ii]
vcPC[ii,iii] = temp2[ii,iii]+temp1PC[ii]
if vc[ii,iii] > best
best = vc[ii,iii]
tempind = ii
end
if vcPC[ii,iii] > bestPC
bestPC = vcPC[ii,iii]
tempindPC = ii
end
end
else
vc[ii,iii] = temp2[ii,iii]+temp1[ii]
vcPC[ii,iii] = temp2[ii,iii]+temp1PC[ii]
if vc[ii,iii] > best
best = vc[ii,iii]
tempind = ii
end
if vcPC[ii,iii] > bestPC
bestPC = vcPC[ii,iii]
tempindPC = ii
end
end
end
Indices[i,iii,T-t] = tempind
v[i,iii]= bestPC
end
end
mul!(v_previous, v, pstatevec)
v_previous .*= Î˛
end
end
end
Dpast = matchrow(ones(K,1),ChoiceSet)
Dstar = ChoiceSet[Indices[Dpast,indshocks[Ď„],Ď„],:]
return Dstar
end
@views function fun_outside(Ď„,Jáµ˘,Ind,R,Î ,S,SeqShocks,Dáµ˘,K,J,T;Î¦=2,prob=0.8)
D1 = fun_inside(Ď„,Jáµ˘,Ind,R,Î ,S,SeqShocks,Dáµ˘,K,J,T,1)
D2 = fun_inside(Ď„,Jáµ˘,Ind,R,Î ,S,SeqShocks,Dáµ˘,K,J,T,2)
return D1, D2
end
#fun_outside(Ď„,Jáµ˘,Ind,R,Î ,S,SeqShocks,Dáµ˘,K,J,T)
TEST= @time fun_outside(Ď„,Jáµ˘,Ind,R,Î ,S,SeqShocks,Dáµ˘,K,J,T)
```

EDIT: This MWE has been edited to include the comments made by @jlapeyre and answer some questions posed by @bcmichael