julia> using SymbolicRegression
julia> function myloss(tree, dataset, options)
@show tree
end
myloss (generic function with 2 methods)
julia> options = SymbolicRegression.Options(
binary_operators=[+, *, SymbolicRegression.div, -],
npopulations=1,
complexity_of_constants=99999,
loss_function = myloss
);
julia> X = reshape([1,2,4,5,10,25], 6, 1); y = [94.0]
1-element Vector{Float64}:
94.0
julia> hof = EquationSearch(X, y; options=options, niterations=30)
tree = 94.0
ERROR: MethodError: Cannot `convert` an object of type Node{Float64} to an object of type Float64
The error makes sense but the showed tree = 94.0 is a constant solution which should have been prohibited due to large complexity_of_constants, any hint?
using SymbolicRegression
# Count operators:
function count_binary_operators(tree::Node)::Dict{Int,Int}
if tree.degree == 0
Dict{Int,Int}()
elseif tree.degree == 1
count_binary_operators(tree.l)
else
left = count_binary_operators(tree.l)
right = count_binary_operators(tree.r)
# Merge counts:
for (k,v) in right
left[k] = get(left, k, 0) + v
end
# Add count for this node:
left[tree.op] = get(left, tree.op, 0) + 1
left
end
end
function myloss(tree, dataset::Dataset{T,L}, options) where {T,L}
prediction, completed = eval_tree_array(tree, dataset.X, options)
!completed && return L(Inf)
loss = sum(abs2, prediction - dataset.y)
counts = count_binary_operators(tree)
penalty = L(0)
for op in 1:4
if !haskey(counts, op) || counts[op] != 1
penalty += L(1)
end
end
return loss + penalty
end
options = Options(;
binary_operators=[+, *, /, -],
complexity_of_constants=100,
loss_function=myloss,
)
EquationSearch([1 2 4 5 10 25]', [94.0]; options)
This gave me the output:
(((x5 * x5) - (x1 + x4)) / x1)
Is that correct? Or maybe another constraint is needed?
If you need to have at most 1 use of every integer as well, then here’s the fix:
using SymbolicRegression
function combine_counts(l, r)
for (k, v) in r
l[k] = get(l, k, 0) + v
end
l
end
# Count operators and features
function get_counts(tree::Node)
if tree.degree == 0
Dict{Int,Int}(), (tree.constant ? Dict{Int,Int}() : Dict([tree.feature => 1]))
elseif tree.degree == 1
get_counts(tree.l)
else
left_ops, left_features = get_counts(tree.l)
right_ops, right_features = get_counts(tree.r)
# Merge counts:
ops = combine_counts(left_ops, right_ops)
features = combine_counts(left_features, right_features)
# Add count for this node:
ops[tree.op] = get(ops, tree.op, 0) + 1
ops, features
end
end
function myloss(tree, dataset::Dataset{T,L}, options) where {T,L}
prediction, completed = eval_tree_array(tree, dataset.X, options)
!completed && return L(Inf)
loss = sum(abs2, prediction .- dataset.y)
operator_counts, feature_counts = get_counts(tree)
penalty = L(0)
for op in 1:4
if haskey(operator_counts, op) && operator_counts[op] > 1
penalty += L(100)
end
end
for feature in 1:size(dataset.X, 1)
if haskey(feature_counts, feature) && feature_counts[feature] > 1
penalty += L(100)
end
end
return loss + penalty
end
options = Options(;
binary_operators=[+, *, /, -],
complexity_of_constants=100,
loss_function=myloss,
)
X = [1 2 4 5 10 25]
y = [94.0]
EquationSearch(X', y;
options,
niterations=1000,
varMap=[string(x) for x in [X...]],
)
At one point I needed to have a separate SymbolicRegression.div to trigger a separate printing method. But it isn’t needed anymore and I could remove it.
(It automatically converts (/) => SymbolicRegression.div in the construction of Options)