LoadError: On worker 2: Local instance of remote reference not found, occurs at specific function

parallel

#1

Summary

Error:

ERROR: LoadError: On worker 2:
Local instance of remote reference not found

occurs only when

  • it uses @parallel and
  • it uses function CompSens.ell1reconstruct which uses JuMP and Clp

Detail

I encountered this error Local instance of remote reference not found when I tried to use parallel computing. I confirmed non-parallel program is runnable.
Here is codes.

# ExperimentCS.jl

# multiprocessing
addprocs(Sys.CPU_CORES) # add processes
push!(LOAD_PATH, pwd()) # set current directory as load path

# load my module
@everywhere using CompSens

# JLD is used only to load or save files.
import JLD

# load my data
inputFile = "M300-N1000-t0.001"
data = JLD.load("../data/sensingmatrix/$(inputFile).jld")
# A is an matrix shaped data.
A = data["A"] 
N = data["N"]
ρs = 0.02:0.02:0.20
trial = 3
ρl = length(ρs)
loss = SharedArray{Float64}(trial, ρl)

# then calculate a value it depends on matrix A

# parallel (emit error)

@sync @parallel for k in eachindex(loss)
    i, j = ind2sub(loss, k)
    x = Array(sprand(Float64, N, ρs[j]))
    loss[i, j] = evaluate(x, A, ell1reconstruct, ell2loss) # ERROR here
end

# non-parallel (runnable)
#
# for k in eachindex(loss)
#     i, j = ind2sub(loss, k)
#     x = Array(sprand(Float64, N, ρs[j]))
#     loss[i, j] = evaluate(x, A, ell1reconstruct, ell2loss)
# end

The error is occurred at @sync @parallel for loop. Here, evaluate, ell1reconstruct and ell2loss belong to module CompSens.
CompSens is located at .. Module CompSens is this.

# CompSens.jl
# This module calculates something mathematical operation
# This module depends on JuMP and Clp, they are linear programming modeler and solver.
# ExperimentCS.jl uses function ell1reconstruct, this is the core of the calculation.
# There is function test, which is only used to confirm whether this module is loaded.

module CompSens

export sparseVector
export ell1reconstruct
export ell1loss
export ell2loss
export evaluate
export test

import JuMP
import Clp

# スパースベクトル生成
distFunc(i::Int64, K::Int64) = max(-i + K, 0)
# distFunc(i, K) = -i + K > 0 ? 1.0 : 0

"""
スパースベクトルを作成する
N::Int64 次元
K::Int64 要素数 K < N
"""
function sparseVector(N::Int64, K::Int64)
    x = zeros(N)
    for i = 1:N
        x[i] = distFunc(i, K)
    end
    shuffle(x)
end

"""
ell1再構成をする
unknown_x: 未知ベクトル ∈ R^N
A: センシング行列 ∈ R^{M×N}
"""
function ell1reconstruct(unknown_x::Array{Float64, 1}, A::AbstractArray{Float64, 2})
    N = length(unknown_x)
    if (size(A, 2) ≠ N)
        error("xとAのサイズが合いません")
    end
    M = size(A, 1)
    y = A * unknown_x

    solver = Clp.ClpSolver()

    l1 = JuMP.Model(solver=solver)
    JuMP.@variable(l1, t[1:N])
    JuMP.@variable(l1, x[1:N] >= 0.0)

    JuMP.@objective(l1, Min, sum(t[n] for n=1:N))

    JuMP.@constraint(l1, [n=1:N], -t[n] <= x[n])
    JuMP.@constraint(l1, [n=1:N], x[n] <= t[n])
    JuMP.@constraint(l1, [m=1:M], y[m] == sum(A[m,n] * x[n] for n=1:N))
    status = JuMP.solve(l1)
    JuMP.getvalue(x)
end

"""
ell1損失関数
"""
ell1loss(x, y) = norm([x, y], 1)

"""
ell2損失関数
"""
ell2loss(x, y) = norm([x, y], 2)

"""
評価する
x::Array{Float64, 1}
A::Array{Float64, 2}
alg 復元アルゴリズム
loss 損失関数
"""
function evaluate(x::Array{Float64, 1}, A::AbstractArray{Float64, 2}, alg, loss)
    estimated_x = alg(x, A)
    loss(estimated_x, x)
end

test() = 1.0

println("CompSens loaded")

end # module

I ran it like this and get the error.

> julia
               _
   _       _ _(_)_     |  A fresh approach to technical computing
  (_)     | (_) (_)    |  Documentation: https://docs.julialang.org
   _ _   _| |_  __ _   |  Type "?help" for help.
  | | | | | | |/ _` |  |
  | | |_| | | | (_| |  |  Version 0.6.2 (2017-12-13 18:08 UTC)
 _/ |\__'_|_|_|\__'_|  |  Official http://julialang.org/ release
|__/                   |  x86_64-apple-darwin14.5.0

julia> include("ExperimentCS.jl")
CompSens loaded
	From worker 5:	CompSens loaded
	From worker 2:	CompSens loaded
	From worker 3:	CompSens loaded
	From worker 4:	CompSens loaded
WARNING: type MassSpec.QMSParameter not present in workspace; reconstructing
WARNING: type MassSpec.HillInitialState not present in workspace; reconstructing
WARNING: type MassSpec.Waveform not present in workspace; interpreting array as Array{Any}
WARNING: type MassSpec.Waveform not present in workspace; reconstructing
ERROR: LoadError: On worker 2:
Local instance of remote reference not found
channel_from_id at ./distributed/remotecall.jl:144
init_loc_flds at ./sharedarray.jl:399
deserialize at ./sharedarray.jl:456
handle_deserialize at ./serialize.jl:690
deserialize_global_from_main at ./distributed/clusterserialize.jl:154
foreach at ./abstractarray.jl:1733
deserialize at ./distributed/clusterserialize.jl:56
handle_deserialize at ./serialize.jl:726
deserialize at ./serialize.jl:637
handle_deserialize at ./serialize.jl:681
deserialize at ./serialize.jl:637
deserialize_datatype at ./serialize.jl:997
handle_deserialize at ./serialize.jl:677
deserialize at ./serialize.jl:637
handle_deserialize at ./serialize.jl:684
deserialize_msg at ./distributed/messages.jl:98
message_handler_loop at ./distributed/process_messages.jl:161
process_tcp_streams at ./distributed/process_messages.jl:118
#99 at ./event.jl:73
Stacktrace:
 [1] #remotecall_fetch#141(::Array{Any,1}, ::Function, ::Function, ::Base.Distributed.Worker, ::Base.Distributed.RRID, ::Vararg{Any,N} where N) at ./distributed/remotecall.jl:354
 [2] remotecall_fetch(::Function, ::Base.Distributed.Worker, ::Base.Distributed.RRID, ::Vararg{Any,N} where N) at ./distributed/remotecall.jl:346
 [3] #remotecall_fetch#144(::Array{Any,1}, ::Function, ::Function, ::Int64, ::Base.Distributed.RRID, ::Vararg{Any,N} where N) at ./distributed/remotecall.jl:367
 [4] remotecall_fetch(::Function, ::Int64, ::Base.Distributed.RRID, ::Vararg{Any,N} where N) at ./distributed/remotecall.jl:367
 [5] call_on_owner(::Function, ::Future, ::Int64, ::Vararg{Int64,N} where N) at ./distributed/remotecall.jl:440
 [6] wait(::Future) at ./distributed/remotecall.jl:455
 [7] sync_end() at ./task.jl:274
 [8] include_from_node1(::String) at ./loading.jl:576
 [9] include(::String) at ./sysimg.jl:14
while loading /Users/myusername/calc/ExperimentCS.jl, in expression starting on line 303

When CompSens.jl is loaded, it says “CompSens loaded”. Therefore I think module CompSens is loaded to every worker but it fails to run.
When I changed the parallel for loop part to this:

@sync @parallel for k in eachindex(loss)
    test()
    # i, j = ind2sub(loss, k)
    # x = Array(sprand(Float64, N, ρs[j]))
    # loss[i, j] = evaluate(x, A, ell1reconstruct, ell2loss)
end

It doesn’t cause any error.

julia> include("ExperimentCS.jl")
CompSens loaded
	From worker 4:	CompSens loaded
	From worker 5:	CompSens loaded
	From worker 3:	CompSens loaded
	From worker 2:	CompSens loaded
WARNING: type MassSpec.QMSParameter not present in workspace; reconstructing
WARNING: type MassSpec.HillInitialState not present in workspace; reconstructing
WARNING: type MassSpec.Waveform not present in workspace; interpreting array as Array{Any}
WARNING: type MassSpec.Waveform not present in workspace; reconstructing
4-element Array{Future,1}:
 Future(2, 1, 27, #NULL)
 Future(3, 1, 28, #NULL)
 Future(4, 1, 29, #NULL)
 Future(5, 1, 30, #NULL)

This error occurs only when:

  • it uses @parallel and
  • it uses CompSens.ell1reconstruct

CompSens.ell1reconstruct uses JuMP and Clp.

I don’t have slightest idea. Does anyone have any idea? Thanks.
(I’ll try to shorten minimal reproducible codes)


#2

I found that loaded SharedArray A through JLD causes the problem.
By converting to Array:

A = Array(data["A"])

It solves the problem.