Sampling from a KDE object

Here’s a minimal demo:

using Distributions: Distributions
using KernelDensity: KernelDensity

# Like KernelDensity.UnivariateKDE, but store also the data and (kernel) distribution we need to build a Distributions-like object.
struct UnivariateKDE{
    D<:AbstractVector{<:Real},
    R<:KernelDensity.UnivariateKDE,
    K<:Distributions.UnivariateDistribution,
}
    data::D
    dist::K
    kde::R
end

function kde(
    data::AbstractVector{<:Real};
    bandwidth=KernelDensity.default_bandwidth(data),
    kernel=Normal,
    kwargs...
)
    dist = KernelDensity.kernel_dist(kernel, bandwidth)
    k = KernelDensity.kde(data, dist; kwargs...)
    return UnivariateKDE(data, dist, k)
end

# A KDE is just a uniform mixture of the kernel shifted to each data point
function Base.convert(::Type{<:Distributions.Distribution}, kde::UnivariateKDE)
    components = map(kde.data) do x
        return x + kde.dist
    end
    return Distributions.MixtureModel(components)
end

using Distributions, StatsPlots

dist = MixtureModel([Normal(0, 1), Normal(3, 3)])
data = rand(dist, 1_000)
k = kde(data)
kde_dist = convert(Distributions.Distribution, k)
p = plot(dist; components=false, label="true dist", lw=2)
plot!(k.kde; label="KDE.jl dist", lw=2)
plot!(kde_dist; components=false, label="KDE mixture", lw=1)
density!(rand(kde_dist, 100_000), lw=2, label="KDE mixture rand")

3 Likes