Hi guys, hope you are all well.
I am having an issue with multiple dispatch and the DataFrame type from DataFrames.jl.
Below is my code…
mutable struct RandomForestClassifier{P} <: Classifier
predict::P
storedata::Result
function RandomForestClassifier(X::Array, Y::Array, rng = Random.GLOBAL_RNG; max_depth = 6,
min_node_records = 1,
n_features_per_node = Int(floor(sqrt(size(X, 2)))), n_trees = 100, cuda = false)
if cuda == true
X = CuArray(X)
Y = CuArray(Y)
end
storedata = fit(TREECLASS(), X, Y, rng, max_depth, min_node_records,
Int(floor(sqrt(size(X, 2)))), n_trees)
predict(xt::Array) = rf_predict(storedata, xt)
new{typeof(predict)}(predict, storedata)
end
function RandomForestClassifier(X::DataFrame, Y::Array, rng = Random.GLOBAL_RNG;
max_depth = 6,
min_node_records = 1,
weights = NoWeights(Dict()), cuda = false,
n_features_per_node = Int(floor(sqrt(size(X, 2)))),
n_trees = 100)
classifiers = []
treec = 0
n_features = size(X)[1]
divamount = n_trees / n_features
if cuda == true
Y = CuArray(Y)
end
for data in eachcol(X)
if cuda == true
data = CuArray(data)
end
mdl = RandomForestClassifier(data, Y, n_trees = divamount)
push!(classifiers, mdl)
end
predict(xt) = _compare_predCat(classifiers, xt)
new{typeof(predict)}(predict, result)
end
end
The issue I am facing is that when this constructor is called with the Array type, everything works as it should. However, whenever I try to use it with the DataFrame type, I always seem to get a methoderror. I even tried changing the type to the type that Julia was saying I was passing (I was passing a DataFrame, as proven with typeof()) and when I try that, the method error just changes into saying no method matching and then some other obscure type. It is weird to me that typeof() tells me it is of DataFrame type, but then I get
" closest candidates are RandomForestClassifier(::Array, ::Array) or RandomForestClassifier(::DataFrame, ::Array).
Is this a bug? It seems like DataFrames has been acting a bit weird since 0.24, the columns seem to be stored in some sort of PooledDataArray instead now, anyway… Which I am sure is a sub-type of AbstractArray… I am not really sure why it is impossible to pass a DataFrame here.