Hi all! I am solving a model and after Profiling the code I realized that the function myfun2 is the big bottleneck of my code. I created a MWE that preserves the computational cost of doing this function and I wanted to ask for help optimizing this function and speeding it as much as possible. Any suggestion or improvement would be very very much appreciated.
Basically, in this code what I do is to evaluate the payoffs of different paths. It is a small combinatorial discrete choice problem and of course, suffers from the course of dimensionality. I a trying to keep it at low dimensions (K<12), but when K is bigger than 8 it already becomes really slow.
Here is the MWE:
using BenchmarkTools
J = 30
T = 20
Π = 75*rand(J,T)
Dᵢ = rand([0],J,1)
feat1 = rand([1,2],J,1)
feat2 = rand([1,2],J,1)
Φ1 = 50
Φ2 = 50
Sj = 200*ones(J)
Π=Π.-Sj
K = 10
Ind =([1,4,10,12,J-15,J-10,J-6,J-5,J-2,J])
function myfun1(Ind,K,D::Array{Int64},Φ1,Φ2,feat1,feat2) #Think about how to make this faster
    X1c = zeros(Int8,size(D,1))
    X2c = zeros(Int8,size(D,1))
    X1l = zeros(Int8,size(D,1))
    X2l = zeros(Int8,size(D,1))
    countc = zeros(Float64,size(D,1))
    countl = zeros(Float64,size(D,1))
    feat1=feat1[Ind]
    feat2=feat2[Ind]
    for j=1:K
        for l=1:K
            X1c[j] = X1c[j] + (feat1[j]==feat1[l])*D[l]
            X1l[j] = X1l[j] + (feat2[j]==feat2[l])*D[l]
            countc[j] = countc[j] + (feat1[j]==feat1[l])
            countl[j] = countl[j] + (feat2[j]==feat2[l])
        end
    end
    X1c = Φ1.*(X1c.>0)./countc
    X1l = Φ2.*(X1l.>0)./countl
    fcc = (ones(K,1)'*X1c)[1]
    fcl = (ones(K,1)'*X1l)[1]
    return fcc, fcl
end
function myfun2(Ind,Π,Φ1,Φ2,Sj,Dᵢ,feat1,feat2,K,J,T)
    β = 0.95
    Π = Π[Ind,:]
    Sj = Sj[Ind]
    D = zeros(Int64,2^K,K,T)
    Indices =  ones(Int64,2^K,T)
    ind =  ones(Int64,T+1)
    vc = zeros(Float64,2^K,2^K,T)
    v = zeros(Float64,2^K,T)
    Dstar = zeros(Int64,K,T+1)
    Dstar[:,1]=Dᵢ[Ind]
    Πstar=0
    ChoiceSet = zeros(Int64,2^K,K)
    for j=0:2^K-1
        int_s = collect(string(j, base=2, pad=K))
        int_s = parse.(Int8,int_s)
        ChoiceSet[j+1,:]=int_s
    end
    for t=0, i=1:2^K
        for ii=1:2^K
            (eg,el)=myfun1(Ind,K,ChoiceSet[ii,:],Φ1,Φ2,feat1,feat2)
            vc[i,ii,T-t] = sum(ChoiceSet[ii,:].*(Π[:,T-t] .+ Sj.*ChoiceSet[i,:])) .- (eg .+ el)
        end
        v[i,T-t] = maximum(vc[i,:,T-t])
        Indices[i,T-t] = argmax(vc[i,:,T-t])
        D[i,:,T-t] = ChoiceSet[argmax(vc[i,:,T-t]),:]
    end
    for t=1:T-1, i=1:2^K
        for ii=1:2^K
            (eg,el)=myfun1(Ind,K,ChoiceSet[ii,:],Φ1,Φ2,feat1,feat2)
            vc[i,ii,T-t] = sum(ChoiceSet[ii,:].*(Π[:,T-t] .+ Sj.*ChoiceSet[i,:])) .- (eg .+ el)+β*v[ii,T-t+1]
        end
        v[i,T-t] = maximum(vc[i,:,T-t])
        Indices[i,T-t] = argmax(vc[i,:,T-t])
        D[i,:,T-t] = ChoiceSet[argmax(vc[i,:,T-t]),:]
    end
    for t=2:T+1
        ind[t] = Indices[ind[t-1],t-1]
        Dstar[:,t]=D[ind[t],:,t-1]
    end
    for t=2:T+1
        (eg,el)=myfun1(Ind,K,Dstar[:,t],Φ1,Φ2,feat1,feat2)
        for j=1:K
            π=β^(t-2)*Dstar[j,t]*(Π[j,t-1]+Sj[j]*Dstar[j,t-1])
            Πstar=π+Πstar
        end
        Πstar = Πstar - β^(t-2)*(eg+el)
    end
    Dstar = Dstar[:,2:end]
    return Dstar, Πstar
end
@benchmark myfun2(Ind,Π,Φ1,Φ2,Sj,Dᵢ,feat1,feat2,K,J,T)
BenchmarkTools.Trial:
  memory estimate:  53.14 GiB
  allocs estimate:  440488318
  --------------
  minimum time:     32.632 s (7.66% GC)
  median time:      32.632 s (7.66% GC)
  mean time:        32.632 s (7.66% GC)
  maximum time:     32.632 s (7.66% GC)
  --------------
  samples:          1
  evals/sample:     1