1+ module ClusteringAPI
2+
3+ # Use the README as the module docs
4+ @doc let
5+ path = joinpath (dirname (@__DIR__ ), " README.md" )
6+ include_dependency (path)
7+ read (path, String)
8+ end ClusteringAPI
9+
10+ export ClusteringAlgorithm, ClusteringResults
11+ export cluster, cluster_number, cluster_labels
12+
13+ abstract type ClusteringAlgorithm end
14+ abstract type ClusteringResults end
15+
16+ """
17+ cluster(ca::ClusteringAlgortihm, data) → cr::ClusteringResults
18+
19+ Cluster input `data` according to the algorithm specified by `ca`.
20+ All options related to the algorithm are given as keyword arguments when
21+ constructing `ca`. The input data can be specified two ways:
22+
23+ - as a (d, m) matrix, with d the dimension of the data points and m the amount of
24+ data points (i.e., each column is a data point).
25+ - as a length-m vector of length-d vectors (i.e., each inner vector is a data point).
26+
27+ The cluster labels are always the
28+ positive integers `1:n` with `n::Int` the number of created clusters.
29+
30+ The output is always a subtype of `ClusteringResults`,
31+ which always extends the following two methods:
32+
33+ - `cluster_number(cr)` returns `n`.
34+ - `cluster_labels(cr)` returns `labels::Vector{Int}` a length-m vector of labels
35+ mapping each data point to each cluster (`1:n`).
36+
37+ and always includes `ca` in the field `algorithm`.
38+
39+ Other algorithm-related output can be obtained as a field of the result type,
40+ or other specific functions of the result type.
41+ This is described in the individual algorithm implementations.
42+ """
43+ function cluster (ca:: ClusteringAlgorithm , data:: AbstractMatrix )
44+ throw (ArgumentError (" No implementation for `cluster` for $(typeof (ca)) ." ))
45+ end
46+
47+ """
48+ cluster_number(cr::ClusteringResults) → n::Int
49+
50+ Return the number of created clusters in the output of [`cluster`](@ref).
51+ """
52+ function cluster_number (cr:: ClusteringResults )
53+ return length (Set (cluster_labels (cr))) # fastest way to count unique elements
54+ end
55+
56+ """
57+ cluster_labels(cr::ClusteringResults) → labels::Vector{Int}
58+
59+ Return the cluster labels of the data points used in [`cluster`](@ref).
60+ """
61+ function cluster_labels (cr:: ClusteringResults )
62+ return cr. labels # typically there
63+ end
64+
65+ # two helper functions for agnostic input data type
66+ """
67+ input_data_size(data) → (d, m)
68+
69+ Return the data point dimension and number of data points.
70+ """
71+ input_data_size (A:: AbstractMatrix ) = size (A)
72+ input_data_size (A:: AbstractVector{<:AbstractVector} ) = (length (first (A)), length (A))
73+
74+ """
75+ each_data_point(data)
76+
77+ Return an indexable iterator over each data point in `data`, that can be
78+ indexed with indices `1:m`.
79+ """
80+ each_data_point (A:: AbstractMatrix ) = eachcol (A)
81+ each_data_point (A:: AbstractVector{<:AbstractVector} ) = A
82+
83+ end
0 commit comments