Error launching slurm job: no method matching replace(::String, ::String, ::String)

Steps to reproduce:

  1. log in and request an interactive session:
    srun -A me -p free --pty bash -i
  2. cd to a network share that is accessible from every node on the cluster:
    cd /dfs5/myshare/
  3. open the REPL, activate environment and confirm packages:
Pkg.activate('./'); Pkg.status()
  [6e4b80f9] BenchmarkTools v0.5.0
  [052768ef] CUDA v1.3.3
  [34f1f09b] ClusterManagers v0.3.2
  [92fee26a] GZip v0.5.1
  [033835bb] JLD2 v0.2.0
  [682c06a0] JSON v0.21.1
  [91a5bcdd] Plots v1.6.3
  [8ba89e20] Distributed 
  [37e2e46d] LinearAlgebra 
  [9a3f8284] Random 
  1. call the following example script from ClusterManagers.jl (where -p free is a valid slurm partition):
include("slurmtest.jl")
using Distributed
using ClusterManagers

# Arguments to the Slurm srun(1) command can be given as keyword
# arguments to addprocs.  The argument name and value is translated to
# a srun(1) command line argument as follows:
# 1) If the length of the argument is 1 => "-arg value",
#    e.g. t="0:1:0" => "-t 0:1:0"
# 2) If the length of the argument is > 1 => "--arg=value"
#    e.g. time="0:1:0" => "--time=0:1:0"
# 3) If the value is the empty string, it becomes a flag value,
#    e.g. exclusive="" => "--exclusive"
# 4) If the argument contains "_", they are replaced with "-",
#    e.g. mem_per_cpu=100 => "--mem-per-cpu=100"
addprocs(SlurmManager(2), partition="free", t="00:5:00")

hosts = []
pids = []
for i in workers()
	host, pid = fetch(@spawnat i (gethostname(), getpid()))
	push!(hosts, host)
	push!(pids, pid)
end

# The Slurm resource allocation is released when all the workers have
# exited
for i in workers()
	rmprocs(i)
end

Get the following error:

Error launching Slurm job:
ERROR: LoadError: TaskFailedException:
MethodError: no method matching replace(::String, ::String, ::String)
Closest candidates are:
  replace(::String, ::Pair{#s67,B} where B where #s67<:AbstractChar; count) at strings/util.jl:421
  replace(::String, ::Pair{#s64,B} where B where #s64<:Union{Tuple{Vararg{AbstractChar,N} where N}, Set{#s61} where #s61<:AbstractChar, AbstractArray{#s62,1} where #s62<:AbstractChar}; count) at strings/util.jl:426
  replace(::String, ::Pair; count) at strings/util.jl:433
  ...
Stacktrace:
 [1] launch(::SlurmManager, ::Dict{Symbol,Any}, ::Array{WorkerConfig,1}, ::Base.GenericCondition{Base.AlwaysLockedST}) at /data/homezvol2/me/.julia/packages/ClusterManagers/7pPEP/src/slurm.jl:28
 [2] (::Distributed.var"#39#42"{SlurmManager,Dict{Symbol,Any},Array{WorkerConfig,1},Base.GenericCondition{Base.AlwaysLockedST}})() at ./task.jl:358
Stacktrace:
 [1] wait at ./task.jl:267 [inlined]
 [2] addprocs_locked(::SlurmManager; kwargs::Base.Iterators.Pairs{Symbol,String,Tuple{Symbol,Symbol},NamedTuple{(:partition, :t),Tuple{String,String}}}) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.4/Distributed/src/cluster.jl:494
 [3] addprocs(::SlurmManager; kwargs::Base.Iterators.Pairs{Symbol,String,Tuple{Symbol,Symbol},NamedTuple{(:partition, :t),Tuple{String,String}}}) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.4/Distributed/src/cluster.jl:441
 [4] top-level scope at /dfs5/myshare/Software_a1/DTMwork/demos/distributed.jl:8
 [5] include(::String) at ./client.jl:439
 [6] top-level scope at REPL[3]:1
 [7] eval(::Module, ::Any) at ./boot.jl:331
 [8] eval_user_input(::Any, ::REPL.REPLBackend) at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.4/REPL/src/REPL.jl:86
 [9] run_backend(::REPL.REPLBackend) at /data/homezvol2/me/.julia/packages/Revise/ucYAZ/src/Revise.jl:1184
 [10] top-level scope at REPL[3]:0
in expression starting at /dfs5/myshare/Software_a1/DTMwork/demos/distributed.jl:8

Seems that we need to launch via sbatch and utilize the @everywhere macro to instantiate.
The following works

sbatch_run.sh
#!/bin/bash
#SBATCH -A <account>
#SBATCH --job-name="sbatch_clustermanagers"
#SBATCH --output="sbatch_clustermanagers.%j.%N.out"
#SBATCH --partition=<partition>
#SBATCH --nodes=2
#SBATCH --export=ALL
#SBATCH --ntasks-per-node=6
#SBATCH -t 01:00:00
export SLURM_NODEFILE=`generate_pbs_nodefile`
../../Binary/julia-1.4.2/bin/julia --machine-file $SLURM_NODEFILE /dfs5/myshare/distributed.jl
sbatch_run.jl
@everywhere using Pkg
@everywhere Pkg.activate("./")
@everywhere Pkg.instantiate()
using ClusterManagers

hosts = []
pids = []
for i in workers()
	host, pid = fetch(@spawnat i (gethostname(), getpid()))
	push!(hosts, host)
	push!(pids, pid)
end

# The Slurm resource allocation is released when all the workers have
# exited
for i in workers()
	rmprocs(i)
end
println("\n hosts are: \n")
display(hosts)
println("\n pids are: \n")
display(pids)

Output:

Activating environment at `/dfs5/myshare/Project.toml`
      From worker 3:     Activating environment at `/dfs5/mypart/Project.toml`
      From worker 9:     Activating environment at `/dfs5/myshare/Project.toml`
      From worker 13:    Activating environment at `/dfs5/myshare/Project.toml`
      From worker 10:    Activating environment at `/dfs5/myshare/Project.toml`
      From worker 11:    Activating environment at `/dfs5/myshare/Project.toml`
      From worker 12:    Activating environment at `/dfs5/myshare/Project.toml`
      From worker 2:     Activating environment at `/dfs5/myshare/Project.toml`
      From worker 4:     Activating environment at `/dfs5/myshare/Project.toml`
      From worker 8:     Activating environment at `/dfs5/myshare/Project.toml`
      From worker 6:     Activating environment at `/dfs5/myshare/Project.toml`
      From worker 5:     Activating environment at `/dfs5/myshare/Project.toml`
      From worker 7:     Activating environment at `/dfs5/myshare/Project.toml`

 hosts are: 

12-element Array{Any,1}:
 "hpc3-14-25"
 "hpc3-14-16"
 "hpc3-14-25"
 "hpc3-14-25"
 "hpc3-14-25"
 "hpc3-14-25"
 "hpc3-14-25"
 "hpc3-14-16"
 "hpc3-14-16"
 "hpc3-14-16"
 "hpc3-14-16"
 "hpc3-14-16"
 pids are: 

12-element Array{Any,1}:
 18023
  6674
 18082
 18083
 18084
 18085
 18086
  6736
  6737
  6738
  6739
  6740
1 Like