Custom XGBoost Loss function w/ Zygote. Julia Computing blog post

Albert_Zevelev · April 1, 2020, 2:36am

Here is my code to train all 49 regression models to predict house prices in Boston.
@tlienart helped w/ training all MLJ models
@mthelm85 helped w/ one_hot_encode

Functions to clean data, load & train all models:

Code

#load packages
using MLJ, RDatasets, TableView, DataFrames
################################################################################
#OHE/AZ(X)/load_m/train_m
#OHE
function one_hot_encode(d::DataFrame)
    encoded = DataFrame()
    for col in names(d), val in unique(d[!, col])
        lab = string(col) * "_" * string(val)
        encoded[!, Symbol(lab) ] = ifelse.(d[!, col] .== val, 1, 0)
    end
    return encoded
end
#AZ: convert Strings & Count to OHE.
function AZ(X)
    sch = schema(X);
    #ty = [CategoricalString{UInt8}, CategoricalString{UInt32}, CategoricalValue{Int64,UInt32}]
    tn = [Int, Float16, Float32, Float64]
    vs = [];
    for (name, type) in zip(sch.names, sch.types)
        if type ∉ tn  #∈ ty #∉ [Int32, Int64, Float64]
            #println(:($name) , "  ", type)
            push!(vs, :($name) )
            #global X = coerce(X, :($name) =>Continuous);
        end
    end
    #
    Xd= DataFrame(X);
    X_ohe = one_hot_encode( Xd[:, vs]  )
    Xd = hcat( X_ohe, select(Xd, Not( vs )) )
    Xd = coerce(Xd, autotype(Xd, :discrete_to_continuous))
    #sch= schema(Xd);
    #@show sch.scitypes;
    #
    X=Xd
    return X
end
#Load & make model list.
@inline function load_m(model_list)
    model_names = Vector{String}(undef, length(model_list))
    @inbounds for (i, model) in enumerate(model_list)
        #load(model.name, pkg=model.package_name)
        load(model.name, pkg=model.package_name, verbosity=0) #
        #@load model pkg=model.package_name verbosity=1
        model_names[i] = model.name
    end
    return model_names
end
#Train & Score.
#NOTE: if we do target engineering we need to transform Y back to compare score.
@inline function train_m(m::String, X, y, train, test, pr, meas; invtrans=identity)
    t1 = time_ns()
    println(m)
    if m =="XGBoostRegressor"
        mdl  = eval(Meta.parse("$(m)(num_round=500)"))
    elseif m=="EvoTreeRegressor"
        mdl  = eval(Meta.parse("$(m)(nrounds = 1500)"))
    else
        mdl  = eval(Meta.parse("$(m)()"))
    end
    #
    mach = machine(mdl, X, y)
    fit!(mach, rows=train, verbosity=0) #, verbosity=0
    #ŷ = MLJ.pr(mach, rows=test)
    ŷ = pr(mach, rows=test)
    ŷ = invtrans.(ŷ)
    y = invtrans.(y)
    #AZ Custom oos-R2
    if meas=="Rsq"
        ê = (ŷ-y[test]) #sse=ê'ê;
        ẽ = ( mean(y[train]) .- y[test] )
        R2 = ( 1 - ( (ê'ê)/(ẽ'ẽ) ) )*100
        s = R2
    elseif meas==rmsl
        s = meas(abs.(ŷ), abs.(y[test]) )  #abs.() for rmsl AMES.
    else
        s = meas(ŷ, y[test])
    end
    t2 = time_ns()
    return [round(s, sigdigits=5), round((t2-t1)/1.0e9, sigdigits=5)]
end

Applied to the Boston housing data:

Code

X, y =  @load_boston;
train, test = partition(eachindex(y), .7, rng=333);
X = AZ(X)
m_match = models(matching(X, y), x -> x.prediction_type == :deterministic);
m_names = load_m(m_match);

#
sc = [train_m(m, X, y, train, test, predict, rms) for m in m_names]
sc =hcat(sc...)';
showtable( hcat(
    m_names[sortperm(sc[:,1])] ,
    sc[sortperm(sc[:,1]), :]
    ) )
#
sc = [train_m(m, X, log.(y), train, test, predict, rms, invtrans=exp) for m in m_names]
sc =hcat(sc...)';
showtable( hcat(
    m_names[sortperm(sc[:,1])] ,
    sc[sortperm(sc[:,1]), :]
    ) )
#
sc = [train_m(m, log.(X.+1), y, train, test, predict, rms) for m in m_names]
sc =hcat(sc...)';
showtable( hcat(
    m_names[sortperm(sc[:,1])] ,
    sc[sortperm(sc[:,1]), :]
    ) )
#
sc = [train_m(m, log.(X.+1), log.(y), train, test, predict, rms, invtrans=exp) for m in m_names]
sc =hcat(sc...)';
showtable( hcat(
    m_names[sortperm(sc[:,1])] ,
    sc[sortperm(sc[:,1]), :]
    ) )
#

All 49 models can be trained in seconds & the out-of-sample scores are competitive!

Note a lot more can be done:

Moar Models (not currently in MLJ):
MLJFlux.jl (WIP)
@xiaodai’s JLBoost.jl, @joshday’s SparseRegression.jl, @rakeshvar’s AnyBoost.jl
Also the MLJ Roadmap mentions: Turing.jl, Gen.jl, Soss.jl
HP tuning (I currently use default HP grids):
MLJTuning.jl (WIP) looks promising.
I’d love to use @baggepinnen’s Hyperopt.jl to automatically tune all models w/ Bayesian optimization.
Ensembling:
MLJ has nice options for ensembles I’d like to automate for a large number of models.
In addition @ppalmes’s AutoMLPipelines.jl is amazing (see discussion).

In addition to the Boston data I’ve run all models on:
Regression: Ames Iowa housing/Diamonds/King County Housing
Classification: Crabs/Iris/Titanic/Pima

MLJ community: please let me know if this works or if you have any feedback

Paulito, would it be possible to use your package to automatically train every model on the Boston housing data? And consider some stacked ensembles of the set of models?