I am trying to build a function to run in parallel. The function returns no output when run in parallel but works fine without parallel run. I am sure being new to Julia I am doing something wrong here. The function also throws an error while assigning num_round variable (num_round=modelparam[:num_round]) near the end of code. Any help in fixing the code is very much appreciated.
using Printf,Statistics,DecisionTree,StatsBase
using ScikitLearn.CrossValidation: KFold
Random.seed!(123);
using MLBase,XGBoost
using ScikitLearn: @sk_import
@sk_import metrics: (recall_score,roc_auc_score,accuracy_score,classification_report)
using MacroTools
function prockey(key)
if @capture(key, (a_ : b_) | (a_ => b_) | (a_ = b_))
return :($(string(a))=>$b)
end
error("Invalid json key syntax $key")
end
function procmap(d)
if @capture(d, f_(xs__))
return :($f($(map(procmap, xs)...)))
elseif !@capture(d, {xs__})
return d
else
return :($(Dict{String, Any})($(map(prockey, xs)...)))
end
end
macro json(ex)
esc(MacroTools.prewalk(procmap, ex))
end
@everywhere using XGBoost,MLBase,StatsBase
@everywhere begin
function train_model(train_x,train_y,param_grid;n_iters=1,n_splits=3)
modelscore::Float64 = 0.0
score=0.0
cv_score=Array{Float64,1}(undef,n_splits)
modelparam=Dict{Symbol,Any}()
sampler(a) = StatsBase.sample(a)
xgparam=Dict{Symbol,Any}()
n=size(train_y,1)
k_fold = KFold(n, n_folds=n_splits)
@sync @distributed for i in 1:n_iters
for key in keys(param_grid)
xgparam[key]= sampler(param_grid[key])
end
num_round= xgparam[:num_round]
j=1
@sync @distributed for (train_index, test_index) in k_fold
X_train, X_test = train_x[train_index,:], train_x[test_index,:];
y_train, y_test = train_y[train_index], train_y[test_index];
model=xgboost(X_train,num_round,label=y_train,param=xgparam,silent=1,objective="binary:logistic")
y_pred=Array{Int64,1}(map(val->round(val),XGBoost.predict(model,X_test)))
# cv_score[j]=recall_score(y_test,y_pred)
cv_score[j]=roc_auc_score(y_test,y_pred)
j=j+1
end
score = StatsBase.mean(cv_score)
if score> modelscore
modelscore=score
modelparam=xgparam
end
end
#Fit the model:
num_round=10 #modelparam[:num_round]
model=xgboost(train_x,num_round,label=train_y,param=modelparam,silent=1,objective="binary:logistic")
return (model, modelscore,modelparam)
end
end
param_grid=Dict{Symbol,Any}(:colsample_bytree=>(0.7:0.0001:1.0),
:eta=> (0.02:0.0002:0.2),
:max_depth=> (2:2:30),
:subsample=> (0.7:0.001:1.0),
:num_round=>(10:5:100));
model,best_score,best_param=train_model(X,y,param_grid,n_iters=10,n_splits=5); #compile the function
# model,best_score,best_param=train_model(X,y,param_grid,n_iters=50,n_splits=10);
@printf("Best Score : %.4f",best_score)
@json(best_param)
output:
Best Score : 0.0000
Dict{Symbol,Any} with 0 entries