Silhouette coefficient calculation

I made it.

Functions that changed:
  1. The euclidean distance function got updated:

    function euclidean(a::AbstractVector, b::AbstractArray)
        √(sum((a' .- b).^2))
    end
    
  2. All the aᵢ and the bᵢ implementations got updated:

    function aᵢ(data::AbstractArray{T}, labels::AbstractVector{S}, i::Int64, method::Common) where {T<:Real, S<:Real}
        labelᵢ = labels[i]
        same_cluster_members_idx = findall(isequal(labelᵢ), labels)
        n = length(same_cluster_members_idx)
    
        sum_dist = euclidean(data[i, :], data[same_cluster_members_idx, :])
        return sum_dist/(n-1)
    end
    
    function aᵢ(data::AbstractArray{T}, labels::AbstractVector{S}, i::Int64, method::Simplified, centers) where {T<:Real, S<:Real}
        labelᵢ = labels[i]
        return euclidean(data[i, :], centers[labelᵢ]')
    end
    
    function bᵢ(data::AbstractArray{T}, labels::AbstractVector{S}, i::Int64, method::Common, centers) where {T<:Real, S<:Real}
        labelᵢ = labels[i]
        dissim_labels = [idx for idx=1:length(centers) if idx!=labelᵢ]
        mean_dist = similar(dissim_labels, Float64)
        idx = 0
        for (idx,j) in enumerate(dissim_labels)
            related_idx = findall(isequal(j), labels)
            @inbounds mean_dist[idx] = euclidean(data[i, :], data[related_idx, :])
        end
    
        return minimum(mean_dist)
    end
    
    function bᵢ(data::AbstractArray{T}, labels::AbstractVector{S}, i::Int64, method::Simplified, centers) where {T<:Real, S<:Real}
        labelᵢ = labels[i]
        clusters_to_iterate = [idx for idx=1:length(centers) if idx!=labelᵢ]
        center = vcat(transpose.(centers)...)
        mean_dist = [euclidean(data[i, :], center[clus_idx, :]) for clus_idx in clusters_to_iterate]
    
        return minimum(mean_dist)
    end
    
Finally, benchmarking and the results (1000 data points and 4 clusters):
using ClusterAnalysis, DataFrames, Statistics, Tables, BenchmarkTools

df = DataFrame(rand(Int64, 1000, 2), :auto);
model = kmeans(df, 4);

@benchmark Silouhette($df, $model)
BenchmarkTools.Trial: 1741 samples with 1 evaluation.
 Range (min … max):  2.165 ms … 13.458 ms  ┊ GC (min … max): 0.00% … 63.10%
 Time  (median):     2.393 ms              ┊ GC (median):    0.00%
 Time  (mean ± σ):   2.851 ms ±  1.478 ms  ┊ GC (mean ± σ):  9.71% ± 13.98%

  █▆▅▄▃▄▄▂▂▁▁
  ████████████▅▆▅▄▄▄▁▁▁▁▁▁▄▄▄▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▇▆▄▄▅▇▇▅▅ █
  2.16 ms      Histogram: log(frequency) by time     10.1 ms <

 Memory estimate: 1.76 MiB, allocs estimate: 33516.

And plotting the results:

plot(
    scatter(
        Matrix(df)[:, 1],
        Matrix(df)[:, 2],
   )
)

plot(
        kind=:scatter,
        Matrix(df)[:, 1],
        Matrix(df)[:, 2],
        group=model.cluster,
        legend=false,
        title="K-means clustering",
        xlabel="X",
        ylabel="Y",
        markersize=4,
        markerstrokewidth=0,
        markeralpha=0.5,
        markershape=:circle,
        color=[:red :blue :green :orange],
        size=(600, 400)

)

benchmarking (100,000 data points and 4 clusters):
df = DataFrame(rand(Int64, 100_000, 2), :auto);
model = kmeans(df, 4);

@benchmark Silouhette($df, $model)
BenchmarkTools.Trial: 17 samples with 1 evaluation.
 Range (min … max):  271.559 ms … 353.798 ms  ┊ GC (min … max): 11.00% … 9.50%
 Time  (median):     300.393 ms               ┊ GC (median):    10.40%
 Time  (mean ± σ):   305.673 ms ±  22.780 ms  ┊ GC (mean ± σ):  10.45% ± 1.08%

  ▁       █    █▁▁  ▁  █   ▁         ▁ ▁ ▁       ▁▁           ▁  
  █▁▁▁▁▁▁▁█▁▁▁▁███▁▁█▁▁█▁▁▁█▁▁▁▁▁▁▁▁▁█▁█▁█▁▁▁▁▁▁▁██▁▁▁▁▁▁▁▁▁▁▁█ ▁
  272 ms           Histogram: frequency by time          354 ms <

 Memory estimate: 177.74 MiB, allocs estimate: 3498518.