Improve performance of a code with very large % of Garbage Collection

You’re probably best to run N in parallel over the 256 cores, and have each core use f1.

I’d do something like:

using Distributed

@everywhere begin
    using JuMP
    import Distributions
    import MosekTools
    import NLopt
    import Random
end

@everywhere struct Data
    N::Int
    I::Int
    J::Int
    S::Int
    σ::Int
    α::Float64
    z::Array{Float64}
    β::Vector{Float64}
    τ::Matrix{Float64}
end

@everywhere function f2(data::Data, Cap::Vector{Float64}, s::Int, n::Int)
    opt = Model(Mosek.Optimizer)
    set_optimizer_attribute(opt, "QUIET", false)
    set_optimizer_attribute(opt, "INTPNT_CO_TOL_DFEAS", 1e-7)
    set_silent(opt)
    σ′ = ((data.σ - 1) / data.σ)^(data.σ - 1) / data.σ
    @variable(opt, ν[1:data.I] >= 0)
    @variable(opt, μ[1:data.J] >= 0)
    @variable(opt, t[1:data.J] >= 0)
    @objective(
        opt,
        Min,
        sum(Cap[i] * ν[i] for i in 1:data.I) +
        sum(σ′ * data.β[j]^data.σ * t[j] for j in 1:data.J),
    )
    @constraint(
        opt,
        [i = 1:data.I, j = 1:data.J],
        data.τ[j, i] * ν[i] - μ[j] >= -data.τ[j, i] / data.z[i, s, n],
    )
    @constraint(
        opt,
        [j = 1:data.J],
        [t[j], μ[j], 1.0] in MOI.PowerCone(1 / data.σ)
    )
    optimize!(opt)
    return objective_value(opt), value.(ν)
end

function generate_data(; N=3, I = 250, J = 50, S = 100)
    σ = 5
    α = 0.15
    Random.seed!(12345)
    z = exp.(rand(Distributions.Normal(-2.5 / 2, sqrt(2.5)), I, S, N)) .+ rand(I)
    β = ones(J)
    function distmat(J, I)
        coordinates_market = hcat(1:J, zeros(J))
        coordinates_plant = hcat((1:I) .* J / I, zeros(I))
        return [
            1 + sqrt(
                (coordinates_market[j, 1] - coordinates_plant[l, 1])^2 +
                (coordinates_market[j, 2] - coordinates_plant[l, 2])^2
            )
            for j = 1:J, l = 1:I
        ]
    end
    τ = distmat(J, I)
    return Data(N, I, J, S, 5, 0.15, z, β, τ)
end

@everywhere function f1(data::Data, x::Vector{Float64}, n::Int)
    Eπ, Eν = 0.0, zeros(data.I)
    for s in 1:data.S
        temp = f2(data, x, s, n)
        Eπ += temp[1]
        Eν .+= temp[2]
    end
    return Eπ / data.S - data.α * sum(x), Eν ./ data.S .- data.α
end

@everywhere function opt_n(data::Data, n::Int)
    opt = NLopt.Opt(:LD_MMA, data.I)
    opt.lower_bounds = zeros(data.I)
    opt.xtol_rel = 1e-3
    opt.ftol_rel = 1e-8
    opt.min_objective = (x, grad) -> begin
        f, ∇f = f1(data, x, n)
        if length(grad) > 0
            grad .= -∇f
        end
        return -f
    end
    _, min_x, _ = NLopt.optimize(opt, ones(data.I))
    return min_x
end  

@everywhere function parallel_big_loop(data::Data)
    pool = CachingPool(workers())
    let data = data
        ret = pmap(n -> opt_n(data, n), pool, 1:data.N)
        return hcat(ret...)
    end
end

data = generate_data(N = 3, I = 25, J = 5, S = 10)
parallel_big_loop(data)
1 Like