Hello, my code is as follows. The error comes up only when I call a Python function inside the Julia function named χ²Full
. If I set that function to zero, then there is not problem. Besides, my original code does work in parallel outside SLURM.
using Pkg
Pkg.activate(".")
@everywhere begin
using Pkg
Pkg.activate(".")
end
@everywhere begin
using PyCall
using Optimization, OptimizationNOMAD
using FiniteDiff
using DelimitedFiles
using CSV
using DataFrames, DataFramesMeta
# end
# @everywhere begin
# %%
pushfirst!(PyVector(pyimport("sys")."path"), "")
importLib = pyimport("importlib")
stream = pyimport("stream")
potentials = pyimport("potential_classes")
u = pyimport("astropy.units")
# importLib.reload(stream)
# importLib.reload(potentials)
end
# %%
@everywhere begin
println("hello from $(myid()):$(gethostname())")
println("Threads=", Threads.nthreads())
"""Loop in ϵ."""
"""Anti-normalization function."""
function back_orig(x, a, b)
return (b-a).*x + a
end
"""χ² wrap."""
function χ²Full(x, p)
m = p[1]
ic = p[2]
r☼ = p[3]
θ, ω, β = x
return stream.chi2_full(θ, ω, β, m, ic, r☼)
end
"""Worker function."""
function worker(i, sol_dir, m, ic, r☼, lb, ub)
x₀ = 0.5*(lb+ub)
p = (m, ic, r☼)
prob = OptimizationProblem(χ²Full, x₀, p, lb=lb, ub=ub)
sol = Optimization.solve(prob, NOMADOpt(); display_all_eval=false, maxiters=1)
χ² = sol.objective
worker_file = "$(sol_dir)/worker_optim_pot_m$(Int(m))_i$i.txt"
@show i, myid(), sol.u, χ², worker_file
worker_sol = ("Minimizer = $(sol.u)", "Minimum = $(sol.objective)")
writedlm(worker_file, worker_sol)
return sol
end
"""Build grid."""
function build_grid(lb_g, ub_g, n_grid)
n_full = n_grid^3
lb_a = Vector{Vector{Float64}}(undef,n_full)
ub_a = Vector{Vector{Float64}}(undef,n_full)
x₀_a = Vector{Vector{Float64}}(undef,n_full)
c₁ = collect(range(lb_g[1], ub_g[1], n_grid+1))
c₂ = collect(range(lb_g[2], ub_g[2], n_grid+1))
c₃ = collect(range(lb_g[3], ub_g[3], n_grid+1))
for i ∈ 1:n_grid
for j ∈ 1:n_grid
for k ∈ 1:n_grid
n = (i-1)*n_grid^2+(j-1)*n_grid+k
lb_a[n] = [c₁[i], c₂[j], c₃[k]]
ub_a[n] = [c₁[i+1], c₂[j+1], c₃[k+1]]
x₀_a[n] = 0.5*(lb_a[n]+ub_a[n])
end
end
end
return lb_a, ub_a, x₀_a
end
"""Parallel function."""
function cooperative(sol_dir, m, ic, r☼, lb_g, ub_g, n_grid)
lb_a, ub_a, x₀_a = build_grid(lb_g, ub_g, n_grid)
pars(i) = [i, sol_dir, m, ic, r☼, lb_a[i], ub_a[i]]
res = pmap(i->worker(pars(i)...), eachindex(x₀_a))
return res
end
# %%
end
"""Initial orbit conditions file."""
const ic_file = "param_fit_orbit_from_IbataPolysGaiaDR2-data_fixedpot.txt"
const ic = readdlm(ic_file)
"""Metaparameters."""
const m = 360.0
const sol_dir = "sol_dir_optim_pot_m$(Int(m))"
const sol_file = "sol_optim_pot_m$(Int(m)).txt"
const r☼ = 8.122
const lb_g = [40., 27., 1.e-5]
const ub_g = [45., 31., 0.005]
const n_grid = 2
@show m sol_file r☼ lb_g ub_g
if !isdir(sol_dir)
run(`mkdir $sol_dir`)
end
# """Running."""
sol = cooperative(sol_dir, m, ic, r☼, lb_g, ub_g, n_grid)
@show sol
obj = [sol[i].objective for i ∈ eachindex(sol)]
min, index = findmin(obj)
best_u = sol[index].u
best = ("Minimizer = $(best_u)", "Minimum = $(min)")
writedlm(sol_file, best)
This is the error:
From worker 5: ┌ Error: Fatal error on process 5
From worker 5: │ exception =
From worker 5: │ PyError ($(Expr(:escape, :(ccall(#= /home/mmestrefcaglp/.julia/packages/PyCall/ilqDX/src/pyfncall.jl:43 =# @pysym(:PyObject_Call), PyPtr, (PyPtr, PyPtr, PyPtr), o, pyargsptr, kw))))) <class 'TypeError'>
From worker 5: │ TypeError("cannot pickle 'traceback' object")
From worker 5: │
From worker 5: │ Stacktrace:
From worker 5: │ [1] pyerr_check
From worker 5: │ @ ~/.julia/packages/PyCall/ilqDX/src/exception.jl:75 [inlined]
From worker 5: │ [2] pyerr_check
From worker 5: │ @ ~/.julia/packages/PyCall/ilqDX/src/exception.jl:79 [inlined]
From worker 5: │ [3] _handle_error(msg::String)
From worker 5: │ @ PyCall ~/.julia/packages/PyCall/ilqDX/src/exception.jl:96
From worker 5: │ [4] macro expansion
From worker 5: │ @ ~/.julia/packages/PyCall/ilqDX/src/exception.jl:110 [inlined]
From worker 5: │ [5] #107
From worker 5: │ @ ~/.julia/packages/PyCall/ilqDX/src/pyfncall.jl:43 [inlined]
From worker 5: │ [6] disable_sigint
From worker 5: │ @ ./c.jl:473 [inlined]
From worker 5: │ [7] __pycall!
From worker 5: │ @ ~/.julia/packages/PyCall/ilqDX/src/pyfncall.jl:42 [inlined]
From worker 5: │ [8] _pycall!(ret::PyObject, o::PyObject, args::Tuple{PyObject}, nargs::Int64, kw::Ptr{Nothing})
From worker 5: │ @ PyCall ~/.julia/packages/PyCall/ilqDX/src/pyfncall.jl:29
From worker 5: │ [9] _pycall!
From worker 5: │ @ ~/.julia/packages/PyCall/ilqDX/src/pyfncall.jl:11 [inlined]
From worker 5: │ [10] #pycall#112
From worker 5: │ @ ~/.julia/packages/PyCall/ilqDX/src/pyfncall.jl:80 [inlined]
From worker 5: │ [11] pycall
From worker 5: │ @ ~/.julia/packages/PyCall/ilqDX/src/pyfncall.jl:80 [inlined]
From worker 5: │ [12] serialize(s::Distributed.ClusterSerializer{Sockets.TCPSocket}, pyo::PyObject)
From worker 5: │ @ PyCall ~/.julia/packages/PyCall/ilqDX/src/serialize.jl:14
From worker 5: │ [13] serialize_any(s::Distributed.ClusterSerializer{Sockets.TCPSocket}, x::Any)
From worker 5: │ @ Serialization ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:678
From worker 5: │ [14] serialize(s::Distributed.ClusterSerializer{Sockets.TCPSocket}, x::Any)
From worker 5: │ @ Serialization ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:657
From worker 5: │ [15] serialize(s::Distributed.ClusterSerializer{Sockets.TCPSocket}, ex::CapturedException)
From worker 5: │ @ Distributed ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/Distributed/src/clusterserialize.jl:192
From worker 5: │ [16] serialize_any(s::Distributed.ClusterSerializer{Sockets.TCPSocket}, x::Any)
From worker 5: │ @ Serialization ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:678
From worker 5: │ [17] serialize(s::Distributed.ClusterSerializer{Sockets.TCPSocket}, x::Any)
From worker 5: │ @ Serialization ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:657
From worker 5: │ [18] serialize_msg(s::Distributed.ClusterSerializer{Sockets.TCPSocket}, o::Distributed.ResultMsg)
From worker 5: │ @ Distributed ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/Distributed/src/messages.jl:78
From worker 5: │ [19] #invokelatest#2
From worker 5: │ @ ./essentials.jl:816 [inlined]
From worker 5: │ [20] invokelatest
From worker 5: │ @ ./essentials.jl:813 [inlined]
From worker 5: │ [21] send_msg_(w::Distributed.Worker, header::Distributed.MsgHeader, msg::Distributed.ResultMsg, now::Bool)
From worker 5: │ @ Distributed ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/Distributed/src/messages.jl:181
From worker 5: │ [22] send_msg_now
From worker 5: │ @ ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/Distributed/src/messages.jl:118 [inlined]
From worker 5: │ [23] send_msg_now(s::Sockets.TCPSocket, header::Distributed.MsgHeader, msg::Distributed.ResultMsg)
From worker 5: │ @ Distributed ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/Distributed/src/messages.jl:113
From worker 5: │ [24] deliver_result(sock::Sockets.TCPSocket, msg::Symbol, oid::Distributed.RRID, value::RemoteException)
From worker 5: │ @ Distributed ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:102
From worker 5: │ [25] macro expansion
From worker 5: │ @ ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:293 [inlined]
From worker 5: │ [26] (::Distributed.var"#109#111"{Distributed.CallMsg{:call_fetch}, Distributed.MsgHeader, Sockets.TCPSocket})()
From worker 5: │ @ Distributed ./task.jl:514
From worker 5: └ @ Distributed ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/Distributed/src/process_messages.jl:106
Please let me know if it would be good to have a MWE or if the error source can be recognized as it is.
Thank you very much.