phenology
diff --git a/‎CHANGELOG.rst
Lines changed: 12 additions & 0 deletions b/‎CHANGELOG.rst
Lines changed: 12 additions & 0 deletions
diff --git a/‎CITATION.cff
Lines changed: 2 additions & 2 deletions b/‎CITATION.cff
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.rst
Lines changed: 20 additions & 15 deletions b/‎README.rst
Lines changed: 20 additions & 15 deletions
diff --git a/‎cgc/__version__.py
Lines changed: 1 addition & 1 deletion b/‎cgc/__version__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎cgc/coclustering.py
Lines changed: 65 additions & 37 deletions b/‎cgc/coclustering.py
Lines changed: 65 additions & 37 deletions
diff --git a/‎cgc/coclustering_dask.py
Lines changed: 24 additions & 13 deletions b/‎cgc/coclustering_dask.py
Lines changed: 24 additions & 13 deletions
diff --git a/‎cgc/coclustering_numpy.py
Lines changed: 27 additions & 14 deletions b/‎cgc/coclustering_numpy.py
Lines changed: 27 additions & 14 deletions
@@ -8,6 +8,18 @@ This project adheres to `Semantic Versioning <http://semver.org/>`_.
 [Unreleased]
 ************
 
+[0.5.0] - 2021-09-23
+********************
+
+Added
+-----
+* k-means implementation for tri-clustering
+* utility functions to calculate cluster-based averages for tri-clustering
+
+Changed
+-------
+* Best k value in k-means is now selected automatically using the Silhouette score
+
 [0.4.0] - 2021-07-29
 ********************
 
 
@@ -31,7 +31,7 @@ authors:
     family-names: Zurita-Milla
     given-names: Raul
 cff-version: "1.0.3"
-date-released: 2021-07-27
+date-released: 2021-09-23
 doi: "10.5281/zenodo.3979172"
 keywords:
   - "clustering"
@@ -42,5 +42,5 @@ keywords:
 license: "Apache-2.0"
 message: "If you use this software, please cite it using these metadata."
 title: "Clustering Geo-Data Cubes (CGC): A Clustering Tool for Geospatial Applications"
-version: "0.4.0"
+version: "0.5.0"
 ...
@@ -2,7 +2,7 @@
    :widths: 25 25
    :header-rows: 1
 
-   * - fair-software.nl recommendations
+   * - `fair-software.nl <https://fair-software.nl>`_ recommendations
      - Badges
    * - \1. Code repository
      - |GitHub Badge|
@@ -53,18 +53,16 @@
    :target: https://cgc.readthedocs.io/en/latest/?badge=latest
    :alt: Documentation Status
 
-################################################################################
 CGC: Clustering Geo-Data Cubes
-################################################################################
+==============================
 
 Clustering Geo-Data Cubes (CGC) is a Python package to perform clustering analysis for multidimensional geospatial data.
 The included tools allow the user to efficiently run tasks in parallel on local and distributed systems.
 
-
 Installation
 ------------
 
-To install cgc, do:
+To install CGC, do:
 
 .. code-block:: console
 
@@ -85,22 +83,22 @@ Run tests (including coverage) with:
 
   python setup.py test
 
-
 Documentation
-*************
+-------------
 
 The project's full documentation can be found `here <https://cgc.readthedocs.io/en/latest/>`_.
 
 Contributing
-************
+------------
 
-If you want to contribute to the development of cgc,
-have a look at the `contribution guidelines <CONTRIBUTING.rst>`_.
+If you want to contribute to the development of cgc, have a look at the `contribution guidelines`_.
+
+.. _contribution guidelines: https://github.com/phenology/cgc/tree/master/CONTRIBUTING.md
 
 License
-*******
+-------
 
-Copyright (c) 2020,
+Copyright (c) 2020-2021,
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -114,9 +112,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
+Credits
+-------
 
+The code has been developed as a collaborative effort between the `ITC, University of Twente`_ and
+`the Netherlands eScience Center`_ within the generalization of the project
+`High spatial resolution phenological modelling at continental scales`_.
 
-Credits
-*******
+.. _ITC, University of Twente: https://www.itc.nl
+.. _High spatial resolution phenological modelling at continental scales: https://www.esciencecenter.nl/projects/high-spatial-resolution-phenological-modelling-at-continental-scales/
+.. _the Netherlands eScience Center: https://www.esciencecenter.nl
 
-This package was created with `Cookiecutter <https://github.com/audreyr/cookiecutter>`_ and the `NLeSC/python-template <https://github.com/NLeSC/python-template>`_.
+This package was created with `Cookiecutter <https://github.com/audreyr/cookiecutter>`_ and the
+`NLeSC/python-template <https://github.com/NLeSC/python-template>`_.
@@ -1 +1 @@
-__version__ = '0.4.0'
+__version__ = '0.5.0'
@@ -15,19 +15,51 @@
 
 class CoclusteringResults(Results):
     """
-    Contains results and metadata of a co-clustering calculation
+    Contains results and metadata of a co-clustering calculation.
+
+    :var row_clusters: Final row cluster assignment.
+    :type row_clusters: numpy.ndarray
+    :var col_clusters: Final column cluster assignment.
+    :type col_clusters: numpy.ndarray
+    :var error: Approximation error of the co-clustering.
+    :type error: float
+    :var nruns_completed: Number of successfully completed runs.
+    :type nruns_completed: int
+    :var nruns_converged: Number of converged runs.
+    :type nruns_converged: int
     """
-    def reset(self):
-        self.row_clusters = None
-        self.col_clusters = None
-        self.error = None
-        self.nruns_completed = 0
-        self.nruns_converged = 0
+    row_clusters = None
+    col_clusters = None
+    error = None
+    nruns_completed = 0
+    nruns_converged = 0
 
 
 class Coclustering(object):
     """
-    Perform the co-clustering analysis of a 2D array
+    Perform a co-clustering analysis for a two-dimensional array.
+
+    :param Z: Data matrix for which to run the co-clustering analysis
+    :type Z: numpy.ndarray or dask.array.Array
+    :param nclusters_row: Number of row clusters.
+    :type nclusters_row: int
+    :param nclusters_col: Number of column clusters.
+    :type nclusters_col: int
+    :param conv_threshold: Convergence threshold for the objective function.
+    :type conv_threshold: float, optional
+    :param max_iterations: Maximum number of iterations.
+    :type max_iterations: int, optional
+    :param nruns: Number of differently-initialized runs.
+    :type nruns: int, optional
+    :param epsilon: Numerical parameter, avoids zero arguments in the
+        logarithm that appears in the expression of the objective function.
+    :type epsilon: float, optional
+    :param output_filename: Name of the JSON file where to write the results.
+    :type output_filename: string, optional
+    :param row_clusters_init: Initial row cluster assignment.
+    :type row_clusters_init: numpy.ndarray or array_like, optional
+    :param col_clusters_init: Initial column cluster assignment.
+    :type col_clusters_init: numpy.ndarray or array_like, optional
     """
     def __init__(self,
                  Z,
@@ -40,20 +72,6 @@ def __init__(self,
                  output_filename='',
                  row_clusters_init=None,
                  col_clusters_init=None):
-        """
-        Initialize the object
-
-        :param Z: m x n data matrix
-        :param nclusters_row: number of row clusters
-        :param nclusters_col: number of column clusters
-        :param conv_threshold: convergence threshold for the objective function
-        :param max_iterations: maximum number of iterations
-        :param nruns: number of differently-initialized runs
-        :param epsilon: numerical parameter, avoids zero arguments in log
-        :param output_filename: name of the file where to write the clusters
-        :param row_clusters_init: initial row clusters
-        :param col_clusters_init: initial column clusters
-        """
         # Input parameters -----------------
         self.nclusters_row = nclusters_row
         self.nclusters_col = nclusters_col
@@ -80,11 +98,16 @@ def __init__(self,
 
     def run_with_dask(self, client=None, low_memory=True):
         """
-        Run the co-clustering with Dask
-
-        :param client: Dask client
-        :param low_memory: if false, all runs are submitted to the Dask cluster
-        :return: co-clustering results
+        Run the co-clustering analysis using Dask.
+
+        :param client: Dask client. If not specified, the default
+            `LocalCluster` is employed.
+        :type client: dask.distributed.Client, optional
+        :param low_memory: If False, all runs are submitted to the Dask cluster
+            (experimental feature, discouraged).
+        :type low_memory: bool, optional
+        :return: Co-clustering results.
+        :type: cgc.coclustering.CoclusteringResults
         """
         self.client = client if client is not None else Client()
 
@@ -101,14 +124,19 @@ def run_with_threads(self,
                          low_memory=False,
                          numba_jit=False):
         """
-        Run the co-clustering using an algorithm based on numpy + threading
-        (only suitable for local runs)
-
-        :param nthreads: number of threads
-        :param low_memory: if true, use a memory-conservative algorithm
-        :param numba_jit: if true, and low_memory is true, then use Numba
-                          just-in-time compilation to improve performance
-        :return: co-clustering results
+        Run the co-clustering using an algorithm based on Numpy plus threading
+        (only suitable for local runs).
+
+        :param nthreads: Number of threads employed to simultaneously run
+            differently-initialized co-clustering analysis.
+        :type nthreads: int, optional
+        :param low_memory: If True, use a memory-conservative algorithm.
+        :type low_memory: bool, optional
+        :param numba_jit: If True, and low_memory is True, then use Numba
+                          just-in-time compilation to improve the performance.
+        :type numba_jit: bool, optional
+        :return: Co-clustering results.
+        :type: cgc.coclustering.CoclusteringResults
         """
         with ThreadPoolExecutor(max_workers=nthreads) as executor:
             futures = [
@@ -143,7 +171,7 @@ def run_with_threads(self,
         return self.results
 
     def _dask_runs_memory(self):
-        """ Memory efficient Dask implementation: sequential runs """
+        """ Memory efficient Dask implementation: sequential runs. """
         for r in range(self.nruns):
             logger.info(f'Run {self.results.nruns_completed}')
             converged, niters, row, col, e = coclustering_dask.coclustering(
@@ -171,7 +199,7 @@ def _dask_runs_memory(self):
     def _dask_runs_performance(self):
         """
         Faster but memory-intensive Dask implementation: all runs are
-        simultaneosly submitted to the scheduler
+        simultaneously submitted to the scheduler (experimental, discouraged).
         """
         Z = self.client.scatter(self.Z)
         futures = [self.client.submit(
 
@@ -31,19 +31,30 @@ def coclustering(Z, nclusters_row, nclusters_col, errobj, niters, epsilon,
                  col_clusters_init=None, row_clusters_init=None,
                  run_on_worker=False):
     """
-    Run the co-clustering, Dask implementation
-
-    :param Z: m x n data matrix
-    :param nclusters_row: num row clusters
-    :param nclusters_col: number of column clusters
-    :param errobj: convergence threshold for the objective function
-    :param niters: maximum number of iterations
-    :param epsilon: numerical parameter, avoids zero arguments in log
-    :param row_clusters_init: initial row cluster assignment
-    :param col_clusters_init: initial column cluster assignment
-    :param run_on_worker: whether the function is submitted to a Dask worker
-    :return: has converged, number of iterations performed. final row and
-    column clustering, error value
+    Run the co-clustering analysis, Dask implementation.
+
+    :param Z: Data matrix for which to run the co-clustering analysis
+    :type Z: dask.array.Array or array_like
+    :param nclusters_row: Number of row clusters.
+    :type nclusters_row: int
+    :param nclusters_col: Number of column clusters.
+    :type nclusters_col: int
+    :param errobj: Convergence threshold for the objective function.
+    :type errobj: float, optional
+    :param niters: Maximum number of iterations.
+    :type niters: int, optional
+    :param epsilon: Numerical parameter, avoids zero arguments in the
+        logarithm that appears in the expression of the objective function.
+    :type epsilon: float, optional
+    :param row_clusters_init: Initial row cluster assignment.
+    :type row_clusters_init: numpy.ndarray or array_like, optional
+    :param col_clusters_init: Initial column cluster assignment.
+    :type col_clusters_init: numpy.ndarray or array_like, optional
+    :param run_on_worker: Whether the function is submitted to a Dask worker
+    :type run_on_worker: bool, optional
+    :return: Has converged, number of iterations performed, final row and
+    column clustering, approximation error of the co-clustering.
+    :type: tuple
     """
     client = get_client()
 
 
@@ -113,20 +113,33 @@ def coclustering(Z,
                  row_clusters_init=None,
                  col_clusters_init=None):
     """
-    Run the co-clustering, Numpy-based implementation
-
-    :param Z: m x n data matrix
-    :param nclusters_row: number of row clusters
-    :param nclusters_col: number of column clusters
-    :param errobj: convergence threshold for the objective function
-    :param niters: maximum number of iterations
-    :param epsilon: numerical parameter, avoids zero arguments in log
-    :param low_memory: boolean, set low memory usage version
-    :param numba_jit: boolean, set numba optimized single node  version
-    :param row_clusters_init: initial row cluster assignment
-    :param col_clusters_init: initial column cluster assignment
-    :return: has converged, number of iterations performed, final row and
-    column clustering, error value
+    Run the co-clustering analysis, Numpy-based implementation.
+
+    :param Z: Data matrix for which to run the co-clustering analysis
+    :type Z: numpy.ndarray
+    :param nclusters_row: Number of row clusters.
+    :type nclusters_row: int
+    :param nclusters_col: Number of column clusters.
+    :type nclusters_col: int
+    :param errobj: Convergence threshold for the objective function.
+    :type errobj: float, optional
+    :param niters: Maximum number of iterations.
+    :type niters: int, optional
+    :param epsilon: Numerical parameter, avoids zero arguments in the
+        logarithm that appears in the expression of the objective function.
+    :type epsilon: float, optional
+    :param low_memory: Make use of a low-memory version of the algorithm.
+    :type low_memory: bool, optional
+    :param numba_jit: Make use of Numba JIT acceleration (only if low_memory
+        is True).
+    :type numba_jit: bool, optional
+    :param row_clusters_init: Initial row cluster assignment.
+    :type row_clusters_init: numpy.ndarray or array_like, optional
+    :param col_clusters_init: Initial column cluster assignment.
+    :type col_clusters_init: numpy.ndarray or array_like, optional
+    :return: Has converged, number of iterations performed, final row and
+    column clustering, approximation error of the co-clustering.
+    :type: tuple
     """
     [m, n] = Z.shape
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = '0.4.0'`
	`1`	`+__version__ = '0.5.0'`