struct Preproc <: Function
layers::Vector
end
function (p::Preproc)(df::DataFrame, ids=nothing)
df = copy(df)
ids = isnothing(ids) ? range(1, length(p.layers), step=1) : ids
if length(ids) == 1
transform!(df, p.layers[ids[1]])
else
for layer in p.layers[ids]
transform!(df, layer)
end
end
return df
end
I wish to boxcox transform my target variable while storing its power parameter to apply later to novel observatios, so I’ve used BoxCoxTrans and defined the following:
struct BoxCox{T} <: Function
λ::T
end
BoxCox(x::AbstractVector) = BoxCox(BoxCoxTrans.lambda(x).value)
function (m::BoxCox)(x::Union{Real,Missing})
return BoxCoxTrans.transform(x, m.λ)
end
However, when I push and “fit” with
function build_preproc(df)
df_fit = copy(df)
preproc = Preproc([])
push!(preproc.layers, :wait => BoxCox => :wait)
df_fit = preproc(df_fit)
return preproc
end
df I get this on every cell
instead of the transformed values and I just cannot, for the life of me, get it to return the transformed variable.
I don’t really understand the math(?) being done here, but let me ask if I’m reading this right. By transformed value, you mean the expression BoxCoxTrans.transform(x, m.λ) returned by the (m::BoxCox) method right? So the issue is that you’re reaching the callable BoxCox{Float64} instances and need that extra step of calling them, given some input for x::Union{Real,Missing}?
My hunch is that the fix has to be in the :wait => BoxCox => :wait part. I’ve only used DataFrames rarely to limited degrees, so hopefully someone who knows it better can help you out. Maybe it would help them if you told them what the :wait data is (or a simulated example data), and what inputs you expect BoxCox and BoxCox{Float64} to be called on.
Indeed, I’ve tried variations of :wait => BoxCox => :wait such as :wait => ByRow(BoxCox) => :wait which I think maybe got me a step closer since this is what I’m now getting
while before the 0.25... coincides with the BoxCox lambda.
Also, :wait => (w -> BoxCox) => :wait gets me
On the other hand :wait => (w -> BoxCox(w)) => :wait returns the same as :wait => BoxCox => :wait
As for the interpretation of :wait it’s a slightly skewed normally distributed waiting time represented as a <::Number. Hope that helps.
Looks like you don’t apply your functor after creating it, and end up with the same object in the result. I never use DataFrames and not familiar with how functors play with their custom transformation syntax, but for regular arrays the transformation would look like this:
wait = ... # your vector of numbers
b = BoxCox(wait)
wait = b.(wait) # vector of transformed values