I implemented R’s aov
function in julia some time ago. It can handle repeated measures, also when the data is unbalanced. So far the interface is ugly and I didn’t publish it, but I can do so if there is some interest.
Here is my source code, if you want to give it a try.
aov code
using StatsModels, LinearAlgebra, Distributions
toterm(s1::Term, s2::Term) = InteractionTerm((s1, s2))
toterm(s1::InteractionTerm, s2::Term) = InteractionTerm((s1.terms..., s2))
toterm(s1::Term, s2::InteractionTerm) = InteractionTerm((s1, s2.terms...))
toterm(x::Tuple, s2::Term) = map(t -> toterm(t, s2), x)
toterm(s1::Term, x::Tuple) = map(t -> toterm(s1, t), x)
function predictor_product(predictors)
init, rest = Iterators.peel(predictors)
reduce((x, y) -> x + y + toterm(x, y), term.(rest), init = term(init))
end
function errorterm(ef, nested)
terms = term(1) + term(ef)
if length(nested) > 0
nestedterms = predictor_product(nested)
terms += toterm(term(ef), nestedterms)
end
terms
end
name(t::AbstractTerm) = "$(t.sym)"
name(t::InteractionTerm) = join(name.(t.terms), " & ")
name(t::InterceptTerm) = "Intercept"
pivoted_asgn(assign, qrp) = assign[qrp.p[1:rank(qrp.R)]]
function mqr(f::FormulaTerm, data; contrasts = Dict{Symbol, Any}())
mf = ModelFrame(f, data; contrasts)
mm = ModelMatrix(mf)
qrp = qr(mm.m, Val(true))
(qrp = qrp, mf = mf, mm = mm)
end
function aov(f::FormulaTerm, data; return_locals = false)
qrp, mf, mm = mqr(f, data)
aov(qrp, pivoted_asgn(mm.assign, qrp), mf.f.rhs.terms,
getproperty(data, f.lhs.sym); return_locals)
end
aov(l, y) = aov(l..., y)
function aov(qrp, asgn, terms, y; return_locals = false)
effects = qrp.Q' * y
r = rank(qrp.R)
SS = Float64[]
names = String[]
DF = Int[]
for i in union(asgn)
n = name(terms[i])
n == "Intercept" && continue
idxs = findall(asgn .== i)
push!(SS, sum(abs2, @view(effects[idxs])))
push!(names, n)
push!(DF, length(idxs))
end
push!(SS, sum(abs2, @view(effects[r+1:end])))
push!(names, "Residuals")
push!(DF, length(effects) - r)
MSS = SS ./ DF
F = MSS ./ MSS[end]
F[end] = NaN
DFres = DF[end]
result = DataFrame(names = names, DF = DF, SS = SS, MSS = MSS, F = F,
p = @.(ccdf(FDist(DF, DFres), F)))
if return_locals
result, (qrp = qrp, asgn = asgn, terms = terms)
else
result
end
end
function aoverror(f::FormulaTerm, e, nested::T, data; return_locals = false) where T
nested = T <: Symbol ? [nested] : nested
ef = errorterm(e, nested)
efit = mqr(f.lhs ~ ef, data,
contrasts = Dict(e => HelmertCoding(),
[t => HelmertCoding() for t in nested]...))
efit.mm.assign = efit.mm.assign[efit.qrp.p[1:rank(efit.qrp.R)]]
mf = ModelFrame(f, data)
mm = ModelMatrix(mf)
ys = efit.qrp.Q' * getproperty(data, f.lhs.sym)
append!(efit.mm.assign,
fill(maximum(efit.mm.assign) + 1, length(ys) - length(efit.mm.assign)))
Xs = efit.qrp.Q' * mm.m
rows = [findall(efit.mm.assign .== i)
for i in Iterators.drop(sort!(union(efit.mm.assign)), 1)]
result = [inner(Xs, ys, row, mm.assign, mf.f.rhs.terms; return_locals)
for row in rows]
if return_locals
first.(result), (Q = efit.qrp.Q,
locals_t = [(rest..., row)
for (row, rest) in zip(rows, last.(result))])
else
result
end
end
function aoverror(l, y)
ys = l.Q' * y
[aov(qrp, asgn, terms, ys[rows]) for (qrp, asgn, terms, rows) in l.locals_t]
end
function inner(X, y, rows, asgn, terms; return_locals = false)
T = X[rows, :]
cols = findall(reshape(sum(abs2, T, dims = 1), :) .> 1e-5)
qrp = qr(T[:, cols], Val(true))
aov(qrp, pivoted_asgn(asgn[cols], qrp), terms, y[rows]; return_locals)
end
Applied to your example above the result is
julia> aoverror(@formula(value ~ a * b), :id, [], df)
2-element Vector{DataFrame}:
1×6 DataFrame
Row │ names DF SS MSS F p
│ String Int64 Float64 Float64 Float64 Float64
─────┼────────────────────────────────────────────────────────
1 │ Residuals 1 0.653333 0.653333 NaN NaN
4×6 DataFrame
Row │ names DF SS MSS F p
│ String Int64 Float64 Float64 Float64 Float64
─────┼───────────────────────────────────────────────────────────────────────
1 │ b 1 6.93335e-33 6.93335e-33 7.93895e-32 1.0
2 │ a 2 0.121667 0.0608333 0.696565 0.54093
3 │ a & b 2 0.105 0.0525 0.601145 0.583505
4 │ Residuals 5 0.436667 0.0873333 NaN NaN