Problem using PyCall with import not defined

pycall

#1

I am running a test code for a larger code that I am working on and this is the code:

function addWorkersCluster()
  if length(procs()) == 1
    println("Adding cluster")
    machines = []
    cluster = [102,104,105,106,107,109,110,111]
    cluster = [111]
    for machineNumber in cluster
      push!(machines,("192.168.1."*string(machineNumber),:auto))
    end
    println(machines)
    addprocs(machines,tunnel=true,topology=:master_slave)
  end
end

addWorkersCluster()

using PyCall
@pyimport networkx as nx

@everywhere function createAndRunGrid(lenghtGrid::Integer,heightGrid::Integer,failsRange::StepRange{Int,Int},failType::Integer,redundancyValue::Number)
  gGrid = nx.grid_2d_graph(lenghtGrid,heightGrid,periodic=false)
  return [1,2,3,4,5]
end

@everywhere function runTestGrid(lenghtGrid::Integer,heightGrid::Integer,redundancyRange,failsRange::StepRange{Int,Int},repetitionNumber::Integer,failType::Integer)
  d1 = length(redundancyRange)
  d2 = length(failsRange)
  d3 = repetitionNumber
  nNodes = lenghtGrid*heightGrid - 1
  parallelFutureRefList = []
  for redundancyIndex in 1:1:d1
    redundancyValue = redundancyRange[redundancyIndex]
    for repetitionIndex in 1:1:d3
      push!(parallelFutureRefList,@spawn createAndRunGrid(lenghtGrid,heightGrid,failsRange,failType,redundancyValue))
    end
  end
  data = Array{Float64,3}(d1,d2,d3)
  for parallelFutureRefIndex in 1:1:length(parallelFutureRefList)
    notServed = fetch(parallelFutureRefList[parallelFutureRefIndex])
    println(notServed)
  end
end

r = 0.1:0.2:0.9
lenghtGrid = 10
heightGrid = 3
f = 1:1:3
rep = 2
fType = 1

runTestGrid(lenghtGrid,heightGrid,r,f,rep,fType)
println("Run grid")

and I am getting the error:

WARNING: Node state is inconsistent: node 7 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 6 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 3 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 4 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 8 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 2 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 9 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 5 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
ERROR: LoadError: On worker 2:
UndefVarError: nx not defined
createAndRunGrid at teste.jl:21

Any ideias of how to solve it?


#2

This works for me:

julia> @everywhere using PyCall

julia> @everywhere pymath = pyimport("math")

julia> @everywhere pymath[:sin](3)

You need to import PyCall on all of the workers, and import the Python module on all of the workers.


#3

Using @everywhere using PyCall I get the error

WARNING: Node state is inconsistent: node 2 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 8 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 4 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 9 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 7 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 5 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 6 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
WARNING: Node state is inconsistent: node 3 failed to load cache from /home/lps/.julia/lib/v0.6/PyCall.ji. Got:
WARNING: can only precompile from node 1
ERROR: LoadError: On worker 2:
can only precompile from node 1

That format worked on Julia 0.6.0 but when I installed Julia on this computer it installed the 0.6.2, and by what I have seen it affects this syntax.


#4

It works fine for me in 0.6.2.

I’m not sure what kind of cluster system you have, but maybe you didn’t upgrade all the nodes to 0.6.2? You’d certainly get this kind of problem if you run 0.6.0 on some of the nodes and 0.6.2 on others.


#5

I am testing using 2 nodes that i was sure that were installed with Julia 0.6.2


#6

It seems to me that you may need to do run „using PyCall“ (using a single thread) on each machine. This will preconpile and create the *.ji file which each node will be looking for.
Once you have done this, try your code again.


#7

How am I supposed to do that? using @everywhere? Opening a terminal on the machine and running it?


#8

Normally I thought that the .ji file is read only on the master node, and the serialized data is sent over the network to the worker nodes. (So the worker nodes do not even need access to the file system.)

You could certainly try logging into each node and doing Pkg.update(); using PyCall from a single julia process, but I didn’t think it should be necessary? Wouldn’t hurt to try this as a debugging step, however … you might see that there is some error on one of the nodes.