Hi there,
I’ve recently added support for multi-threading to CounterfactualExplanations.jl. This has been my first time using MPI so there are probably many things that can be improved here (any thoughts very much welcome).
Although things have mostly been running smoothly, I have recently ran into errors that I think are related to serialization and MPI.gather
. I can no longer reproduce this on my personal device, so here is the error message returned on our HPC:
Progress (multi-threaded counterfactual computation): 100% Time: 0:00:34 ( 2.02 s/it)
ERROR: LoadError: invalid redefinition of constant ##425
Stacktrace:
[1] deserialize_typename(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, number::UInt64)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:1323
[2] deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, #unused#::Type{Core.TypeName})
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:1285
[3] handle_deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, b::Int32)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:962
[4] deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}})
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:816
[5] handle_deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, b::Int32)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:873
[6] deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}})
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:816
[7] deserialize_datatype(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, full::Bool)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:1409
[8] handle_deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, b::Int32)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:869
[9] deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}})
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:816
[10] deserialize_datatype(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, full::Bool)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:1409
[11] handle_deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, b::Int32)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:869
[12] deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}})
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:816
[13] deserialize_datatype(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, full::Bool)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:1409
[14] handle_deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, b::Int32)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:869
[15] deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}})
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:816
[16] handle_deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, b::Int32)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:884
[17] deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, t::DataType)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:1499
[18] handle_deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, b::Int32)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:880
[19] deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, t::DataType)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:1487
[20] handle_deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, b::Int32)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:885
[21] deserialize_fillarray!(A::Vector{AbstractCounterfactualExplanation}, s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}})
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:1267
[22] deserialize_array(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}})
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:1259
[23] handle_deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, b::Int32)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:867
[24] deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}})
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:816
[25] handle_deserialize(s::Serializer{Base.GenericIOBuffer{SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true}}}, b::Int32)
@ Serialization /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:922
[26] deserialize
@ /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:816 [inlined]
[27] deserialize
@ /scratch/paltmeyer/.julia/juliaup/julia-1.9.3+0.x64.linux.gnu/share/julia/stdlib/v1.9/Serialization/src/Serialization.jl:803 [inlined]
[28] deserialize(x::SubArray{UInt8, 1, Vector{UInt8}, Tuple{UnitRange{Int64}}, true})
@ MPI ~/.julia/packages/MPI/RL21q/src/MPI.jl:17
[29] #115
@ ./abstractarray.jl:0 [inlined]
[30] iterate
@ ./generator.jl:47 [inlined]
[31] collect_to!(dest::Vector{Vector{AbstractCounterfactualExplanation}}, itr::Base.Generator{Base.Iterators.Zip{Tuple{Vector{Int32}, Vector{Int32}}}, MPI.var"#115#116"{MPI.VBuffer{Vector{UInt8}}}}, offs::Int64, st::Tuple{Int64, Int64})
@ Base ./array.jl:840
[32] collect_to_with_first!(dest::Vector{Vector{AbstractCounterfactualExplanation}}, v1::Vector{AbstractCounterfactualExplanation}, itr::Base.Generator{Base.Iterators.Zip{Tuple{Vector{Int32}, Vector{Int32}}}, MPI.var"#115#116"{MPI.VBuffer{Vector{UInt8}}}}, st::Tuple{Int64, Int64})
@ Base ./array.jl:818
[33] collect(itr::Base.Generator{Base.Iterators.Zip{Tuple{Vector{Int32}, Vector{Int32}}}, MPI.var"#115#116"{MPI.VBuffer{Vector{UInt8}}}})
@ Base ./array.jl:792
[34] gather(obj::Vector{AbstractCounterfactualExplanation}, comm::MPI.Comm; root::Int64)
@ MPI ~/.julia/packages/MPI/RL21q/src/collective.jl:366
[35] gather(obj::Vector{AbstractCounterfactualExplanation}, comm::MPI.Comm)
@ MPI ~/.julia/packages/MPI/RL21q/src/collective.jl:352
[36] parallelize(::MPIExt.MPIParallelizer, ::typeof(generate_counterfactual), ::Vector{Matrix{Float32}}, ::Vararg{Any}; verbose::Bool, kwargs::Base.Pairs{Symbol, Symbol, Tuple{Symbol, Symbol}, NamedTuple{(:initialization, :converge_when), Tuple{Symbol, Symbol}}})
@ MPIExt ~/.julia/packages/CounterfactualExplanations/AWgmA/ext/MPIExt.jl:122
[37] benchmark(data::CounterfactualData; models::Dict{String, ECCCo.ConformalModel}, generators::Dict{String, CounterfactualExplanations.Generators.GradientBasedGenerator}, measure::Vector{Function}, n_individuals::Int64, suppress_training::Bool, factual::Int64, target::Int64, store_ce::Bool, parallelizer::MPIExt.MPIParallelizer, dataname::String, verbose::Bool, kwrgs::Base.Pairs{Symbol, Symbol, Tuple{Symbol, Symbol}, NamedTuple{(:initialization, :converge_when), Tuple{Symbol, Symbol}}})
@ CounterfactualExplanations.Evaluation ~/.julia/packages/CounterfactualExplanations/AWgmA/src/evaluation/benchmark.jl:245
[38] run_benchmark(exper::Experiment, model_dict::Dict{String, ECCCo.ConformalModel})
@ Main ~/code/ECCCo.jl/experiments/benchmarking/benchmarking.jl:80
[39] benchmark!(outcome::ExperimentOutcome, exper::Experiment)
@ Main ~/code/ECCCo.jl/experiments/experiment.jl:69
[40] run_experiment(exper::Experiment; save_output::Bool, only_models::Bool)
@ Main ~/code/ECCCo.jl/experiments/experiment.jl:95
[41] run_experiment
@ ~/code/ECCCo.jl/experiments/experiment.jl:79 [inlined]
[42] #run_experiment#5
@ ~/code/ECCCo.jl/experiments/experiment.jl:123 [inlined]
[43] top-level scope
@ ~/code/ECCCo.jl/experiments/gmsc.jl:16
[44] include(fname::String)
@ Base.MainInclude ./client.jl:478
[45] top-level scope
@ ~/code/ECCCo.jl/experiments/run_experiments.jl:35
In particular, it seems that the error arises right here. I have changed that file slightly in this recent commit but I’d be surprised if this does the trick.
Since the main error message say invalid redefinition of constant ##425
I should mention that in upstream steps of this workflow I define a set of constant variables that are just global parameters for the experiments I am running. They are not being explicitly overwritten anywhere in the code though, so I doubt that’s related.
Any help would be much appreciated.