As far as I understand it, these continuous analogs are formed by finding a continuous distribution whose CDF evaluates to the same as the CDF of the discrete version at all points in the discrete support. In that case, here are some basic implementations to get you started:
using StatsFuns, Distributions, ADTypes, FiniteDiff
import DifferentiationInterface as DI
struct ContinuousPoisson{T<:Real,B<:ADTypes.AbstractADType} <: ContinuousUnivariateDistribution
λ::T
ad_type::B
end
ContinuousPoisson(λ) = ContinuousPoisson(λ, ADTypes.AutoFiniteDiff())
_discretize(d::ContinuousPoisson) = Poisson(d.λ)
Base.minimum(::ContinuousPoisson) = 0
Base.maximum(::ContinuousPoisson) = Inf
Distributions.cdf(d::ContinuousPoisson, x::Real) = StatsFuns.gammaccdf(x + 1, 1, d.λ)
function Distributions.logcdf(d::ContinuousPoisson, x::Real)
return StatsFuns.gammalogccdf(x + 1, 1, d.λ)
end
struct ContinuousBinomial{T<:Real,B<:ADTypes.AbstractADType} <: ContinuousUnivariateDistribution
n::Int
p::T
ad_type::B
end
ContinuousBinomial(n, p) = ContinuousBinomial(n, p, ADTypes.AutoFiniteDiff())
_discretize(d::ContinuousBinomial) = Binomial(d.n, d.p)
Base.minimum(::ContinuousBinomial) = 0
Base.maximum(::ContinuousBinomial) = d.n
function Distributions.cdf(d::ContinuousBinomial, x::Real)
return StatsFuns.betaccdf(min(x + 1, d.n), max(d.n - x, 0), d.p)
end
function Distributions.logcdf(d::ContinuousBinomial, x::Real)
return StatsFuns.betalogccdf(min(x + 1, d.n), max(d.n - x, 0), d.p)
end
struct ContinuousNegativeBinomial{T<:Real,B<:ADTypes.AbstractADType} <: ContinuousUnivariateDistribution
r::Int
p::T
ad_type::B
end
ContinuousNegativeBinomial(r, p) = ContinuousNegativeBinomial(r, p, ADTypes.AutoFiniteDiff())
_discretize(d::ContinuousNegativeBinomial) = NegativeBinomial(d.r, d.p)
Base.minimum(::ContinuousNegativeBinomial) = 0
Base.maximum(::ContinuousNegativeBinomial) = Inf
function Distributions.cdf(d::ContinuousNegativeBinomial, x::Real)
return StatsFuns.betacdf(d.r, x + 1, d.p)
end
function Distributions.logcdf(d::ContinuousNegativeBinomial, x::Real)
return StatsFuns.betalogcdf(d.r, x + 1, d.p)
end
# shared code
for T in [:ContinuousPoisson, :ContinuousBinomial, :ContinuousNegativeBinomial]
@eval begin
function Distributions.quantile(d::$T, p::Real)
upper = Distributions.quantile(_discretize(d), p)
lower = upper - 1
# NOTE: quantile_bisect is an internal function
return Distributions.quantile_bisect(d, p, lower, upper)
end
function Distributions.pdf(d::$T, x::Real)
return DI.derivative(Base.Fix1(Distributions.cdf, d), d.ad_type, x)
end
function Distributions.logpdf(d::$T, x::Real)
return log(DI.derivative(Base.Fix1(Distributions.logcdf, d), d.ad_type, x)) + logcdf(d, x)
end
end
end
And some quick plots to check that things look good:
using StatsPlots
dists = [ContinuousPoisson(5), ContinuousBinomial(10, 0.3), ContinuousNegativeBinomial(5, 0.3)]
plts = []
for dc in dists
d = _discretize(dc)
p1 = plot(d; func=cdf, label="discrete", title="CDF")
plot!(p1, dc; func=cdf, label="continuous")
p2 = plot(d; label="discrete", title="PDF")
plot!(p2, dc; label="continuous")
append!(plts, [p1, p2])
end
plot(plts...; layout=(3, 2), dpi=300)
Some major limitations are that the incomplete beta and gamma functions that are used in the CDFs are implemented in SpecialFunctions and restricted to AbstractFloat
types, so they can only be differentiated by source-to-source ADs like Tapir or Enzyme. I’m not sure if either of these work on these functions at the moment, which is why these implementations use finite differences.