Hi @affans, thank you for your suggestion. I am very interested in learning this. I guess there is something special with my code. I need to compute the objective function in parallel. So I have the following parallel code which should utilize all threads within a compute node. Therefore, I use job array, and send the Julia script to say 50 compute nodes for 50 Monte Carlo replications. Then, within each compute node, my code utilizes the available threads within that node.
My understanding of SlurmManager(512), N=16
is that it will add 512 cores nested in 16 nodes(?). Then, you parallel 500 replications on 512 cores(?). My concern is that, for my code, it will not be able to parallel on all threads or there will be a conflict.
using Distributed
addprocs(Sys.CPU_THREADS)
@everywhere using JuMP, Gurobi, SparseArrays
@everywhere function fun_sim_optimal_assignment(w_sim::Vector{Float64}, N::Int64)
w_sim = reshape(w_sim, N, N)
model_sim = Model(optimizer_with_attributes(()->Gurobi.Optimizer(GRB_ENV)))
@variable(model_sim, H_sim_temp[1:N, 1:N] >= 0)
@constraint(model_sim, [i = 1:N], sum(H_sim_temp[i, j] for j = 1:N) == 1)
@constraint(model_sim, [j = 1:N], sum(H_sim_temp[i, j] for i = 1:N) == 1)
@objective(model_sim, Max, sum(w_sim[i, j] * H_sim_temp[i, j] for i = 1:N, j = 1:N))
JuMP.optimize!(model_sim)
if termination_status(model_sim) == MOI.OPTIMAL
H_market_sim = value.(H_sim_temp)
return findall(!iszero, sparse(H_market_sim))
else
error("The model was not solved correctly.")
end
end
function fun_H_sim_parallel(fun_sim_optimal_assignment::Function, w_sim_column::Matrix{Vector{Float64}}, num_simulation::Int64, N::Int64, T::Int64)
H_sim_temp = Matrix{Vector{CartesianIndex{2}}}(undef, T, num_simulation)
np = nprocs()
let w_sim_column_temp = w_sim_column
for t = 1:T
i = 1
nextidx() = (idx=i; i+=1; idx)
@sync begin
for p = 1:np
if p != myid() || np == 1
@async begin
while true
idx = nextidx()
if idx > num_simulation
break
end
H_sim_temp[t,idx] = remotecall_fetch(fun_sim_optimal_assignment, p, w_sim_column_temp[t,idx], N)
end
end
end
end
end
end
end
return H_sim_temp
end