Package Versions:
Julia - v1.8.3
Flux - v0.13.9
Hyperopt - v0.5.6
I’m attempting to use the Hyperopt.jl package to help tune the layers and number of nodes in a neural network. I would like to be able to use their implementation of Hyperband to do this, but I’ve run into a very odd stumbling block while trying to get it to run. I’ve reduced the problem to just involve tuning the number of units in two hidden layers - and that works fine. The problem seems to be if I add just one more layer to the function, or if I try to add a method for selecting the number of layers to use as well, which results in anything beyond the first two variables to be marked as “undefined”.
Here is the code for training the neural network using Flux:
function trainModel(traindata, train_l, units; epochs=1)
# Format architecture
n_inputs = size(traindata, 2);
n_outputs = 2;
units = units[1:n_layers];
layers = [n_inputs, n_outputs]
for (i, item) in enumerate(units)
insert!(layers, 1 + i, item)
end
model = generate_neural_network(layers);
# Prepare parameters for training
L(x,y) = sum(Flux.logitcrossentropy(model(traindata'), train_l));
data = Flux.Data.DataLoader((traindata', train_l), batchsize=100, shuffle=true);
ps = Flux.params(model) # contains references to arrays in model
opt = Flux.Adam(0.01) # will store optimiser momentum, etc.
for i in 1:epochs
Flux.train!(L, ps, data, opt) # The training step
end
# Evaluate the model
#acc, recall, specificity, gmean = evaluateModel(Ntestdata, Ptestdata, model)
return model
end
# Takes in list of layers (including input size and output size) and generates architecture
function generate_neural_network(layer_sizes::Array{Int})
layers = []
output_size = layer_sizes[end]
for i in 2:length(layer_sizes) - 1
push!(layers, Dense(layer_sizes[i - 1], layer_sizes[i]))
end
push!(layers, Dense(layer_sizes[end - 1], output_size, identity)) #Gives Identity activation for the last layer
return Chain(layers...)
end
Here is the code for evaluating the neural network. :
using MLJBase:confusion_matrix
function evaluateModel(testdata, test_l, model) # Pass in the model
prediction = softmax(model(testdata'));
ŷ=Flux.onecold(prediction,0:1); # One cold encoding does the opposite of one hot apparently.
# Accuracy
acc = (sum(test_l .== ŷ) / length(test_l))*100
# The confusion matrix takes in the one-cold encoded predictions and the regular vector lables.
confusematrix = confusion_matrix(ŷ, test_l)
tn = confusematrix[1,1];
tp = confusematrix[2,2];
fp = confusematrix[2,1];
fn = confusematrix[1,2];
recall = tp/(tp+fn);
specificity = tn/(tn+fp);
gmean = sqrt(recall*specificity);
return acc, recall, specificity, gmean
end
Here is the objective function to be optimized using Hyperband. I’m attempting to optimize the geometric mean of specificity and sensitivity:
# The objective function for returning gmean
function objectiveVar(traindata, train_l, testdata, test_l, resources, units)
model = trainModel(traindata, train_l, units; epochs=resources);
acc, recall, specificity, gmean = evaluateModel(testdata, test_l, model);
return gmean
end
And finally, the code for actually running Hyperband:
hohb = @hyperopt for resources=50, sampler=Hyperband(R=50, η=3, inner=RandomSampler()),
units1 = rand(1:10, 1000),
units2 = rand(1:10, 1000)
if state !== nothing
x0 = state;
else
x0 = [units1, units2];
end
gmean = objectiveVar(traindata, train_l, testdata, test_l, resources, [units1, units2]); # will return GMean
gmean, (units1, units2)
end
This version works fine - it tunes the number of nodes and returns the maximized gmean. Adding just one node seems to result in the “not defined” error:
hohb = @hyperopt for resources=50, sampler=Hyperband(R=50, η=3, inner=RandomSampler()),
units1 = rand(1:10, 1000),
units2 = rand(1:10, 1000),
units3 = rand(1:10, 1000)
if state !== nothing
x0 = state;
else
x0 = [units1, units2, units3];
end
gmean = objectiveVar(traindata, train_l, testdata, test_l, resources, [units1, units2, units3]); # will return GMean
gmean, (units1, units2, units3)
end
UndefVarError: units3 not defined
Stacktrace:
[1] macro expansion
@ .\In[91]:10 [inlined]
[2] var"#277###hyperopt_objective#406"(resources::Float64, state::Tuple{Int64, Int64, Int64})
@ Main C:\Users\Admin\.julia\packages\Hyperopt\1AdYO\src\Hyperopt.jl:151
[3] #28
@ C:\Users\Admin\.julia\packages\Hyperopt\1AdYO\src\samplers.jl:148 [inlined]
[4] iterate
@ .\generator.jl:47 [inlined]
[5] _collect(c::Vector{Tuple{Int64, Int64, Int64}}, itr::Base.Generator{Vector{Tuple{Int64, Int64, Int64}}, Hyperopt.var"#28#31"{var"##277###hyperopt_objective#406", Float64}}, #unused#::Base.EltypeUnknown, isz::Base.HasShape{1})
@ Base .\array.jl:807
[6] collect_similar(cont::Vector{Tuple{Int64, Int64, Int64}}, itr::Base.Generator{Vector{Tuple{Int64, Int64, Int64}}, Hyperopt.var"#28#31"{var"##277###hyperopt_objective#406", Float64}})
@ Base .\array.jl:716
[7] map(f::Function, A::Vector{Tuple{Int64, Int64, Int64}})
@ Base .\abstractarray.jl:2933
[8] successive_halving(ho::Hyperoptimizer{Hyperband, var"##277###hyperopt_objective#406"}, n::Int64, r::Float64, s::Int64; threads::Bool)
@ Hyperopt C:\Users\Admin\.julia\packages\Hyperopt\1AdYO\src\samplers.jl:148
[9] hyperband(ho::Hyperoptimizer{Hyperband, var"##277###hyperopt_objective#406"}; threads::Bool)
@ Hyperopt C:\Users\Admin\.julia\packages\Hyperopt\1AdYO\src\samplers.jl:125
[10] hyperband
@ C:\Users\Admin\.julia\packages\Hyperopt\1AdYO\src\samplers.jl:109 [inlined]
[11] optimize(ho::Hyperoptimizer{Hyperband, var"##277###hyperopt_objective#406"})
@ Hyperopt C:\Users\Admin\.julia\packages\Hyperopt\1AdYO\src\samplers.jl:98
[12] top-level scope
@ C:\Users\Admin\.julia\packages\Hyperopt\1AdYO\src\Hyperopt.jl:193
[13] eval
@ .\boot.jl:368 [inlined]
[14] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
@ Base .\loading.jl:1428
I’m not sure why this would be an issue - I could see it if I changed the syntax pretty heavily, but all I did was add a variable. I am also new to Julia however, and might be misunderstanding something basic.