I am trying to follow closely the documentation of StatsModels.jl
to get a function term working as syntaxic sugar to define survival response in regression models. The issue is that i get a lot of type instabilities… I managed to get down to the following MWE :
using StatsModels, StatsAPI, DataFrames
struct SurvTerm{X, Y} <: AbstractTerm
T::X
Δ::Y
end
Surv(T::Float64, Δ::Bool) = (T, Δ)
function StatsModels.apply_schema(t::FunctionTerm{typeof(Surv)},
sch::StatsModels.Schema,
Mod::Type{<:Any})
return apply_schema(SurvTerm(t.args...), sch, Mod)
end
function StatsModels.apply_schema(t::SurvTerm{X,Y},
sch::StatsModels.Schema,
Mod::Type{<:Any}) where {X,Y}
T = apply_schema(t.T, sch, Mod)
Δ = apply_schema(t.Δ, sch, Mod)
isa(T, ContinuousTerm) || throw(ArgumentError("Surv only works with continuous terms (got $T)"))
isa(Δ, ContinuousTerm) || throw(ArgumentError("Surv only works with discrete terms (got $Δ)"))
return SurvTerm(T, Δ)
end
function StatsModels.modelcols(t::SurvTerm, d::NamedTuple)
T = modelcols(t.T, d)
Δ = modelcols(t.Δ, d)
return hcat(T,Δ)
end
# then test it :
df = DataFrame(
time = -log.(rand(1000)),
status = rand(1000) .<= 0.8,
sex = ifelse.(rand(1000) .<= 0.5, :male, :female)
)
@code_warntype @formula(Surv(time,status)~sex) # already unstable
@code_warntype schema(df) # stable.
@code_warntype apply_schema(@formula(Surv(time,status)~sex), schema(df)) # very unstable.
Can you help me understand what I missed ?