Distance matrix + clustering with custom distance function

Here’s what I had to do to get this working (basically copy Distances.jl’s internals and make changes):
Any ideas on how this could be accomplished faster/easier, please tell me! I’m really surprised that there is no simpler way to do this… I got lucky that the metric was a type that I could cobble together from the ones presented in the Distances.jl documentation, but I’m still not sure what I will do if the Distance function I try later is not of this type - write this whole thing again?

struct WeightedPeriodicMinkowski{T, W, P <: Real} <: Distances.UnionMetric 
    weights:: W
    periods:: T
    p:: P
end

Distances.parameters(wpm::WeightedPeriodicMinkowski) = (wpm.periods, wpm.weights)

@inline function Distances.eval_op(d::WeightedPeriodicMinkowski, ai, bi, Ti, wi)
    # Taken from the PeriodicEuclidean function 
    s1 = abs(ai - bi)
    s2 = mod(s1, Ti)
    # taken from the WeightedMinkowski function
    abs(min(s2, Ti - s2))^d.p * wi
end
@inline Distances.eval_end(d::WeightedPeriodicMinkowski, s) = s^(1/d.p)
wpminkowski(a, b, w, T, p) = WeightedPeriodicMinkowski(T, w, p)(a, b)
(w::WeightedPeriodicMinkowski)(a,b) = Distances._evaluate(w, a, b)

Distances.result_type(dist::Distances.UnionMetrics, ::Type{Ta}, ::Type{Tb}, (p1, p2)) where {Ta,Tb} =
    typeof(Distances._evaluate(dist, oneunit(Ta), oneunit(Tb), oneunit(eltype(p1)), oneunit(eltype(p2))))

function Distances._evaluate(dist::Distances.UnionMetrics, a::Number, b::Number, p1::Number, p2::Number)
    Distances.eval_end(dist, Distances.eval_op(dist, a, b, p1, p2))
end 
    
Base.@propagate_inbounds function Distances._evaluate(d::Distances.UnionMetrics, a::AbstractArray, b::AbstractArray, (p1, p2)::Tuple{AbstractArray, AbstractArray})
    @boundscheck if length(a) != length(b)
        throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b))."))
    end
    @boundscheck if length(a) != length(p1)
        throw(DimensionMismatch("arrays have length $(length(a)) but parameter 1 has length $(length(p1))."))
    end
    @boundscheck if length(a) != length(p2)
        throw(DimensionMismatch("arrays have length $(length(a)) but parameter 2 has length $(length(p2))."))
    end
    if length(a) == 0
        return zero(result_type(d, a, b))
    end
    @inbounds begin
        s = eval_start(d, a, b)
        if (IndexStyle(a, b, p1, p2) === IndexLinear() && eachindex(a) == eachindex(b) == eachindex(p1)) == eachindex(p2)||
                axes(a) == axes(b) == axes(p) == axes(p2)
            @simd for I in eachindex(a, b, p1, p2)
                ai = a[I]
                bi = b[I]
                p1i = p1[I]
                p2i = p2[I]
                s = eval_reduce(d, s, eval_op(d, ai, bi, p1i, p2i))
            end
        else
            for (ai, bi, p1i, p2i) in zip(a, b, p1, p2)
                s = eval_reduce(d, s, eval_op(d, ai, bi, p1i, p2i))
            end
        end
        return eval_end(d, s)
    end
end

Then I am finally able to write:
Distances.pairwise(WeightedPeriodicMinkowski(repeat([Inf, 360, 180, Inf, Inf, Inf, Inf, 360],6), ones(48), 2), data["class1"] |> Tables.matrix |> transpose) and get results.

1 Like