using RDatasets, DataFrames, JLBoost, MLJ; d = dataset("MASS", "Boston"); train, test = partition(eachindex(d[:,1]), .7, rng=333); target = :MedV; features = setdiff(names(d), [target]); warm_start = fill(0.0, nrow(d)); dt = d[train,:]; dh = d[test,:]; yt = d[train, target]; yh = d[test, target]; y = d[!, target]; using LossFunctions: L2DistLoss; loss = L2DistLoss(); # g_η = .3 ∪ range(0, 1, length=3) g_λ = 0 ∪ range(0, 1, length=3) g_γ = 0 ∪ range(0, 1, length=3) g_md = 6 ∪ (1:10) G = Iterators.product(g_η, g_λ, g_γ, g_md); sc=; p=; @time for g in G m = jlboost(dt, target, features, warm_start, loss; eta = g, lambda = g, gamma = g, max_depth = g ) ŷ = predict(m, dh) push!(sc, rms(ŷ, yh) ) push!(p, ( g, g, g, g) ) end minimum(sc) p[ findall(x->x==minimum(sc), sc) ]
This does grid search over the entire grid G.
Q1: how can I create a new grid, G1, which is a random subset of G w/ 30 elements?
However, I also wanna include all the default hyper-parameters in G1 as well.
Q2: does anyone know all the options currently available in Julia for tuning hyper-parameters?
Currently the only package tagged hyper-parameter optimization in (https://pkg.julialang.org/docs/) is @baggepinnen’s Hyperopt.jl. It looks promising but I can’t load it bc it requires CMake which isn’t building right now.