Ok i do have the following code:
# The goal of this file is to compute the k_statistics polynomials ad given by
# https://discourse.julialang.org/t/help-needed-translate-10-lines-from-maple-to-julia/60650
# and
# https://iris.unito.it/retrieve/handle/2318/1561388/399488/FASTpolykaysMAPLE.pdf
# also see
# Di Nardo E., G. Guarino, D. Senato (2007), A new method for fast computing unbiased estimators of cumulants. In press Statistics and Computing. http://www.springer.com/statistics/computational/journal/11222 (download from http://www.unibas.it/utenti/dinardo/lavori.html)
import Combinatorics
import Polynomials
import StatsBase
import Random
import TypedPolynomials
function makeTab_ss(N::T) where T<:Integer
part = Combinatorics.integer_partitions(N)
dict = Dict{Int64, Int64}()
Nf = factorial(N)
result = Array{Tuple{Rational{Int64}, Vector{Int64}}}(undef,0)
for y in part
denom = T(1)
dict = StatsBase.countmap(y)
for (val,numoccur) in dict
denom *= factorial(val)^numoccur * factorial(numoccur)
end
push!(result,(Nf//denom,y))
end
return result
end
function Stirling_triangle(N::T) where T<:Integer
arr = zeros(T,N,N)
for i in 1:N
for j in 1:i
arr[i,j] = Combinatorics.stirlings2(i,j)
end
end
return arr
end
function makeK_s(N::T) where T<:Integer
result = Array{Polynomials.Polynomial{Float64}}(undef,0)
Stirling2 = Stirling_triangle(N)
for i in 1:N
rez = zeros(i+1)
for j in 1:i
rez[j+1] = Combinatorics.stirlings2(i,j) * (-1)^(j-1) * factorial(j-1)
end
push!(result,Polynomials.Polynomial(rez))
end
return result
end
function expand_eval(v,u)
result = Array{eltype(u)}(undef,0)
for (number,array) in v
rez = number * prod(u[i] for i in array)::eltype(u)
push!(result,rez)
end
return result
end
function fd(j,k)
Polynomials.fromroots([i+k for i in 0:(j-k)])
end
# checks from the note in the source:
@assert fd(3,0) == Polynomials.Polynomial([0,-6,11,-6,1])
@assert fd(3,1) == Polynomials.Polynomial([-6,11,-6,1])
@assert fd(3,2) == Polynomials.Polynomial([6,-5,1])
function build_u2(N)
result = []
for i in 1:N
rez = (-1)^(i-1)*factorial(i-1)*fd(N-1,i)
push!(result,rez)
end
return result
end
function compute_k_representation(n::T,k_idx) where T
v = makeTab_ss(k_idx)
u = makeK_s(k_idx)
v2 = expand_eval(v,u)
poly = zero(eltype(u))
u2 = build_u2(k_idx)
common_denominator = prod(n - x for x in 0:(k_idx-1))
factors = []
for j in 1:size(v2,1)
poly = sum((v2[j].coeffs[i+1]*u2[i]) for i in 1:(length(v2[j].coeffs)-1))
numerator = poly(n)
push!(factors,numerator/common_denominator)
end
which_S = [v[i][2] for i in 1:length(v)]
return (factors,which_S)
end
function compute_k(N,S)
m = length(S)
result = []
for k in 1:m
repr = compute_k_representation(N,k)
rez = sum(repr[1][i] * prod([S[j] for j in repr[2][i]]) for i in 1:size(repr[1],1))
push!(result,rez)
end
return result
end
# Simple experiment:
# Vary the number of observations N, and the mean and variance of the normal random vector.
# The first cumulant should be the mean,
# the second should be the variance
# and all others should be 0 since it's a normal.
# the approximation should get better and better as you increase the number of pointS.
mean = 0
var = 1
m=7
N_obs = 100000
Random.seed!(123)
data = mean .+ sqrt(var) .* randn(N_obs)
S_obs = [sum(data .^ i) for i in 1:m]
cumulants_direct = compute_k(big.(N_obs),big.(S_obs)) # directly without the poynomials.
# As a simple check, we can verify that the polynomials holds :
TypedPolynomials.@polyvar N
TypedPolynomials.@polyvar S[1:7] ##########" put m here in a fixed way, as the infrastructure does not allow for variables.
result = compute_k(N,S);
@assert result[1] == S[1]/N
@assert result[2] == (N * S[2] - S[1]^2)/(N*(N-1))
@assert result[3] == (2S[1]^3 - 3N*S[1]*S[2] +N^2 * S[3]) / (N*(N-1)*(N-2))
@assert result[4] == (-6*S[1]^4 + 12 * N * S[1]^2 * S[2] - 3 * N * (N-1) * S[2]^2 - 4 * N * (N+1) * S[1] * S[3] + N^2*(N+1)*S[4])/(N*(N-1)*(N-2)*(N-3))
cumulants_poly = [result[i](N=>big.(N_obs),S=>big.(S_obs)) for i in 1:m]
display([cumulants_direct cumulants_poly])
The code is clearly suboptimal, however with the introduction of TypedPolynomials, i was able to check that it is correct.
Now the question is :
- Is it possible to compute once and for all the polynomials, for any N and S, and store them to disk somewhere in a package that i can load easily ?
- Is it possible to do the same thing (pre-compute the polynomials) for a given N, on the fly ?
The use case is that i might compute the value of these polynomials a lot of time, for varying S and for fixed N.