I am attempting to run a code I wrote using the SymEngine package in parallel using the @parallel for-loop, but am getting errors I don’t understand. I have written a simple test script that reproduces a similar error when trying to run SymEngine in parallel.
My TestSymEngine.jl script contains:
#Confirm number of cores for parallel test
println("Running on ", nprocs(), " cores")
#On all cores => load SymEngine and create symbolic variable r
@everywhere using SymEngine
@everywhere @vars r
@everywhere SymE = SymEngine.Basic
#Serial arrays
A = Array{Int}([1, 1, 1, 1, 1, 1])
B = Array{Int}(6)
C = Array{SymE}(6)
#Parallel arrays
Ap = SharedArray{Int}([1, 1, 1, 1, 1, 1])
Bp = SharedArray{Int}(6)
Cp = SharedArray{Int}(6)
#Print A arrays
println("A = $A")
println("Ap = $Ap")
#Test serial for-loop without SymEngine
for i=1:6
B[i] = A[i] * i
end
println("B = $B")
#Test parallel for-loop without SymEngine
@sync @parallel for i=1:6
Bp[i] = Ap[i] * i
end
println("Bp = $Bp")
#Test serial for-loop with SymEngine
for i=1:6
C[i] = B[i] * r
end
println("C = $C")
#Test parallel for-loop with SymEngine
let
local C1 = C
@everywhere CC = $C1
end
@everywhere C1 = Int(0)
@sync @parallel for i=1:6
C2 = CC[i] / r
Cp[i] = C2
end
println("Cp = $Cp")
I then call the Julia REPL from a command prompt and run this script using include(). I get the following results/errors:
browning@batch28:~> julia -p 1
_
_ _ _(_)_ | A fresh approach to technical computing
(_) | (_) (_) | Documentation: https://docs.julialang.org
_ _ _| |_ __ _ | Type "?help" for help.
| | | | | | |/ _` | |
| | |_| | | | (_| | | Version 0.6.2 (2017-12-13 18:08 UTC)
_/ |\__'_|_|_|\__'_| | Official http://julialang.org/ release
|__/ | x86_64-pc-linux-gnu
julia> include("TestSymEngine.jl")
Running on 2 cores
A = [1, 1, 1, 1, 1, 1]
Ap = [1, 1, 1, 1, 1, 1]
B = [1, 2, 3, 4, 5, 6]
Bp = [1, 2, 3, 4, 5, 6]
C = SymEngine.Basic[r, 2*r, 3*r, 4*r, 5*r, 6*r]
signal (11): Segmentation fault
while loading no file, in expression starting on line 0
_ZN9SymEngine3divERKNS_3RCPIKNS_5BasicEEES5_ at /p/home/browning/.julia/v0.6/Conda/deps/usr/lib/libsymengine.so.0.3 (unknown line)
basic_div at /p/home/browning/.julia/v0.6/Conda/deps/usr/lib/libsymengine.so.0.3 (unknown line)
/ at /p/home/browning/.julia/v0.6/SymEngine/src/mathops.jl:16
unknown function (ip: 0x7f0486f58716)
jl_call_fptr_internal at /buildworker/worker/package_linux64/build/src/julia_internal.h:339 [inlined]
jl_call_method_internal at /buildworker/worker/package_linux64/build/src/julia_internal.h:358 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:1926
macro expansion at /p/home/browning/TestSymEngine.jl:48 [inlined]
#23 at ./distributed/macros.jl:174
#158 at ./distributed/macros.jl:20
unknown function (ip: 0x7f0486f5830f)
jl_call_fptr_internal at /buildworker/worker/package_linux64/build/src/julia_internal.h:339 [inlined]
jl_call_method_internal at /buildworker/worker/package_linux64/build/src/julia_internal.h:358 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:1926
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1424 [inlined]
jl_f__apply at /buildworker/worker/package_linux64/build/src/builtins.c:426
#103 at ./distributed/process_messages.jl:264 [inlined]
run_work_thunk at ./distributed/process_messages.jl:56
run_work_thunk at ./distributed/process_messages.jl:65 [inlined]
#96 at ./event.jl:73
unknown function (ip: 0x7f0486f4d41f)
jl_call_fptr_internal at /buildworker/worker/package_linux64/build/src/julia_internal.h:339 [inlined]
jl_call_method_internal at /buildworker/worker/package_linux64/build/src/julia_internal.h:358 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:1926
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1424 [inlined]
start_task at /buildworker/worker/package_linux64/build/src/task.c:267
unknown function (ip: 0xffffffffffffffff)
Allocations: 2497544 (Pool: 2496326; Big: 1218); GC: 3
Worker 2 terminated.ERROR:
LoadError: ProcessExitedException()ERROR (unhandled task failure): read: connection reset by peer (ECONNRESET)
Stacktrace:
[1] try_yieldto(::Base.##296#297{Task}, ::Task) at ./event.jl:189
[2] wait() at ./event.jl:234
[3] wait(::Condition) at ./event.jl:27
[4] wait_impl(::Channel{Any}) at ./channels.jl:364
[5] wait(::Channel{Any}) at ./channels.jl:360
[6] take_buffered at ./channels.jl:319 [inlined]
[7] take!(::Channel{Any}) at ./channels.jl:317
[8] #remotecall_fetch#141(::Array{Any,1}, ::Function, ::Function, ::Base.Distributed.Worker, ::Base.Distributed.RRID, ::Vararg{Any,N} where N) at ./distributed/remotecall.jl:350
[9] remotecall_fetch(::Function, ::Base.Distributed.Worker, ::Base.Distributed.RRID, ::Vararg{Any,N} where N) at ./distributed/remotecall.jl:346
[10] #remotecall_fetch#144(::Array{Any,1}, ::Function, ::Function, ::Int64, ::Base.Distributed.RRID, ::Vararg{Any,N} where N) at ./distributed/remotecall.jl:367
[11] remotecall_fetch(::Function, ::Int64, ::Base.Distributed.RRID, ::Vararg{Any,N} where N) at ./distributed/remotecall.jl:367
[12] call_on_owner(::Function, ::Future, ::Int64, ::Vararg{Int64,N} where N) at ./distributed/remotecall.jl:440
[13] wait(::Future) at ./distributed/remotecall.jl:455
[14] sync_end() at ./task.jl:274
[15] include_from_node1(::String) at ./loading.jl:576
[16] include(::String) at ./sysimg.jl:14
while loading /p/home/browning/TestSymEngine.jl, in expression starting on line 303
julia>
Now it gets a little weird… by shear chance, I tried running it again with no changes whatsoever. The following is exactly following the above in the same REPL session with no editing:
julia> include("TestSymEngine.jl")
Running on 1 cores
A = [1, 1, 1, 1, 1, 1]
Ap = [1, 1, 1, 1, 1, 1]
B = [1, 2, 3, 4, 5, 6]
Bp = [1, 2, 3, 4, 5, 6]
C = SymEngine.Basic[r, 2*r, 3*r, 4*r, 5*r, 6*r]
Cp = [1, 2, 3, 4, 5, 6]
julia>
This is the output I would expect from the code, but what is happening that’s causing it to crash on the first pass through but then run on a second try?
Any help is MOST appreciated.