I prefer splitting the distributed setup from the actual code. @everywhere
is considered evil xD.
So as an example I often have a file called setup.jl
that can be variously complex:
using Distributed
using ClusterManagers
# Usage:
# - Set `export JULIA_PROJECT=`pwd``
if haskey(ENV, "SLURM_JOB_ID")
jobid = ENV["SLURM_JOB_ID"]
ntasks = parse(Int, ENV["SLURM_NTASKS"])
cpus_per_task = parse(Int, ENV["SLURM_CPUS_PER_TASK"])
@info "Running on Slurm cluster" jobid ntasks cpus_per_task
manager = SlurmManager(ntasks)
else
ntasks = 2
cpus_per_task = div(Sys.CPU_THREADS, ntasks)
@info "Running locally" ntasks
manager = Distributed.LocalManager(ntasks, false)
end
flush(stderr)
# Launch workers
addprocs(manager; exeflags = ["-t $cpus_per_task"])
@everywhere begin
import Dates
using Logging, LoggingExtras
const date_format = "HH:MM:SS"
function dagger_logger(logger)
logger = MinLevelLogger(logger, Logging.Info)
logger = TransformerLogger(logger) do log
merge(log, (; message = "$(Dates.format(Dates.now(), date_format)) ($(myid())) $(log.message)"))
end
return logger
end
# set the global logger
if !(stderr isa IOStream)
ConsoleLogger(stderr)
else
FileLogger(stderr, always_flush=true)
end |> global_logger
end
@everywhere begin
if myid() != 1
@info "Worker started" Base.Threads.nthreads()
end
sysimg = unsafe_string((Base.JLOptions()).image_file)
project = Base.active_project()
@info "Environment" sysimg project
end
# Load code to execute on all processes
@everywhere begin
include("code.jl")
end
code.jl
then contains the actual code definitions.
And then I have a driver.jl
which is the code to execute on the primary to manage the computation.
I then have a slurm script:
#!/bin/bash
# Begin SLURM Directives
#SBATCH --job-name=Example
#SBATCH --time=1:00:00
#SBATCH --mem=0
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=16
# Clear the environment from any previously loaded modules
module purge > /dev/null 2>&1
module load julia
export JULIA_PROJECT=`pwd`
HOSTNAME=$(hostname)
echo "Primary runs on ${HOSTNAME}"
julia -L setup.jl driver.jl