I am trying to optimize the way my package SymbolicRegression.jl (the pure-Julia version of PySR) calls user-passed operators.
The user passes a list of operators like so:
SymbolicRegression.Options(
binary_operators=[plus, mult])
Inside this package, an equation is stored as a binary tree. Each node in the tree has an index which corresponds to an operator. For example, a node in the tree with index=1 implies the plus
operator, and index=2 implies the mult
operator (in this case).
Without going into too many details, I evaluate operators on some data by calling a function and passing the index of the operator.
This function looks like this:
@inline function BINOP!(x::Array{Float32, 1}, y::Array{Float32, 1}, i::Int, clen::Int, options::Options)
op = options.binops[i]
@inbounds @simd for j=1:clen
x[j] = op(x[j], y[j])
end
end
So, it selects the corresponding operator from the user-defined list, and then calls it.
However, this way of selecting operators seems to be inefficient compared to a having a pre-defined function like so:
@inline function BINOP!(x::Array{Float32, 1}, y::Array{Float32, 1}, i::Int, clen::Int)
if i == 1
@inbounds @simd for j=1:clen
x[j] = plus(x[j], y[j])
end
else
@inbounds @simd for j=1:clen
x[j] = mult(x[j], y[j])
end
end
end
Something about having the operators fixed inside the function which branches over them in an if statement seems to make the compiler happy, and this way of doing it gets a speedup. (note: the Options
argument is a immutable struct; so that argument should be fixed for all of the computation)
The Python front-end for my package meta-programs this if-statement by literally printing a string to a file, which is why this Python frontend/Julia backend seems to be faster than the pure-Julia backend.
So, I’ve tried to do the same thing in Julia but have not had luck. I am trying to meta-program this if-statement from inside Julia at runtime, since the user is free to pass any list of functions they desire.
Here is what I have so far:
function constructBinaryOpEvaluator(binary_operators)
if length(binary_operators) == 0
return """(x::Array{Float32, 1}, y::Array{Float32, 1}, i::Int, clen::Int) -> begin
nothing
end"""
end
for i=1:length(binary_operators)
if i == 1
branch_operators = """
if i == 1
@inbounds @simd for j=1:clen
x[j] = $(string(binary_operators[1]))(x[j], y[j])
end
"""
elseif i < length(binary_operators)
branch_operators *= """
elseif i == $i
@inbounds @simd for j=1:clen
x[j] = $(string(binary_operators[i]))(x[j], y[j])
end
"""
else
branch_operators *= """
else
@inbounds @simd for j=1:clen
x[j] = $(string(binary_operators[i]))(x[j], y[j])
end
"""
end
end
branch_operators *= """
end
"""
return """(x::Array{Float32, 1}, y::Array{Float32, 1}, i::Int, clen::Int) -> begin
$branch_operators
end"""
end
This returns the correct function string that I am looking for (I tried to do this with quote...end
but I couldn’t seem to define incomplete expressions to be concatenated). However, when I try to evaluate it as a closure function with the following code:
plus(x, y) = x + y
mult(x, y) = x * y
binary_operators = [plus, mult]
func_string = constructBinaryOpEvaluator(binary_operators)
func = eval(Meta.parse(func_string))
x = randn(Float32, 5)
println(x)
func(x, x, 1, 5)
println(x)
I get this as an error:
Float32[0.53156346, 0.023917956, 0.15273008, -0.33916265, 0.7506737]
ERROR: LoadError: MethodError: no method matching (::var"#1#2")(::Array{Float32,1}, ::Array{Float32,1}, ::Int64, ::Int64)
The applicable method may be too new: running in world age 27803, while current world is 27804.
Closest candidates are:
#1(::Array{Float32,1}, ::Array{Float32,1}, ::Int64, ::Int64) at none:1 (method too new to be called from this world context.)
So I’m not sure how to move forward - I don’t see a reason for why this shouldn’t work, since I’m running eval
on code I know should have worked if I just typed it out literally.
Any idea what to do?
Thanks,
Miles