Continuing my experiments from another thread using the suggestion of @theogf, I have some code using AGP. With a single predictor I can fit:
using AugmentedGaussianProcesses
const AGP = AugmentedGaussianProcesses
using Distributions
using CairoMakie
using ThreadsX
using DataFrames
using StatsModels
using GLM
N = 1000
σ = 1.0
df = let
a = randn(N)
b = randn(N)
c = randn(N)
ϵ = σ * randn(N)
x = [a b c]
t = 5rand(N)
β = [.1, .2, .3]
y = x * β .+ sin.(t) .+ ϵ
sort(DataFrame(;a,b,c,t,y), :t)
end
lm(@formula(y ~ a + b + c + sin(t)), df)
fig = Figure(resolution=(1000,2000))
Axis(fig[1,1])
scatter!(df.t, df.y, markersize=2)
lines!(df.t, sin.(df.t))
fig
##
Ms = [4, 8, 16,];
kernel = SqExponentialKernel() + PeriodicKernel()
@time models = ThreadsX.map(Ms) do num_inducing
m = SVGP(
reshape(df.t, (nrow(df),1)),
df.y, # First arguments are the input and output
kernel, # Kernel
GaussianLikelihood(σ), # Likelihood used
AnalyticVI(), # Inference usede to solve the problem
num_inducing; # Number of inducing points used
optimiser=false, # Keep kernel parameters fixed
Zoptimiser=false, # Keep inducing points locations fixed
)
@time train!(m, 100) # Train the model for 100 iterations
m
end
##
@time for (i, (m, model)) in enumerate(zip(Ms, models))
Axis(fig[i,1], title=string(m))
n_grid = 40
mins = minimum(df.t)
maxs = maximum(df.t)
x_grid = range(mins, maxs; length=n_grid)
y_grid, sig_y_grid = proba_y(model, reshape(x_grid, :, 1))
band!(x_grid, y_grid .- 2 * sqrt.(sig_y_grid), y_grid .+ 2 * sqrt.(sig_y_grid))
scatter!(df.t, df.y, markersize=4, strokewidth=0, color=:black)
scatter!.(vec(model.f[1].Z), Ref(zeros(dim(model.f[1]))); color=:orange, markersize=4, strokewidth=0)
end
fig
I would like to know if it’s possible to fit with multiple predictors. I don’t see docs about it in AGP, so I just guessed . Currently I get
ERROR: LoadError: DimensionMismatch("Dimensions of the inputs are not matching : a = 2, b = 2, r = 1")
Stacktrace:
[1] _evaluate(::KernelFunctions.Sinus{Float64}, ::SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int64}},Int64},true}, ::SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int64}},Int64},true})
using AugmentedGaussianProcesses
const AGP = AugmentedGaussianProcesses
using Distributions
using CairoMakie
using ThreadsX
using DataFrames
using StatsModels
using GLM
N = 1000
σ = 1.0
df = let
a = randn(N)
b = randn(N)
c = randn(N)
ϵ = σ * randn(N)
x = [a b c]
t = 5rand(N)
β = [.1, .2, .3]
y = x * β .+ sin.(t) .+ ϵ
sort(DataFrame(;a,b,c,t,y), :t)
end
lm(@formula(y ~ a + b + c + sin(t)), df)
fig = Figure(resolution=(1000,2000))
Axis(fig[1,1])
scatter!(df.t, df.y, markersize=2)
lines!(df.t, sin.(df.t))
fig
##
Ms = [4, 8, 16,];
kernel = SqExponentialKernel() * PeriodicKernel()
X = Matrix(df[:, [:t, :a ]])
@time models = map(Ms) do num_inducing
m = SVGP(
X,
df.y, # First arguments are the input and output
kernel, # Kernel
GaussianLikelihood(σ), # Likelihood used
AnalyticVI(), # Inference usede to solve the problem
num_inducing; # Number of inducing points used
optimiser=false, # Keep kernel parameters fixed
Zoptimiser=false, # Keep inducing points locations fixed
)
@time train!(m, 100) # Train the model for 100 iterations
m
end
##
@time for (i, (m, model)) in enumerate(zip(Ms, models))
Axis(fig[i,1], title=string(m))
n_grid = 40
mins = minimum(df.t)
maxs = maximum(df.t)
x_grid = range(mins, maxs; length=n_grid)
y_grid, sig_y_grid = proba_y(model, reshape(x_grid, :, 1))
band!(x_grid, y_grid .- 2 * sqrt.(sig_y_grid), y_grid .+ 2 * sqrt.(sig_y_grid))
scatter!(df.t, df.y, markersize=4, strokewidth=0, color=:black)
scatter!.(vec(model.f[1].Z), Ref(zeros(dim(model.f[1]))); color=:orange, markersize=4, strokewidth=0)
end
fig
I think I want something like y = \beta_1 a + \beta_2 b + \beta_3 c + \text{GP}(t). If I understand right, it’s possible to express the linear terms as GPs too, using linear kernels.