From 5e3503467caf65ac7a1fe50505475cb36f98dbc9 Mon Sep 17 00:00:00 2001
From: Matthew Middlehurst <m.middlehurst@uea.ac.uk>
Date: Wed, 13 Nov 2024 09:48:09 +0000
Subject: [PATCH 1/2] release

---
 MANIFEST.in      | 3 ---
 README.md        | 5 +++--
 pyproject.toml   | 4 ++--
 tsml/__init__.py | 2 +-
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index b459cfe..28ac35a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,3 @@
-recursive-include docs *
 recursive-include tsml *.py
 recursive-include tsml/datasets *.ts
 include .coveragerc
@@ -13,5 +12,3 @@ exclude .codecov.yml
 exclude .gitattributes
 exclude .gitignore
 exclude .pre-commit-config.yaml
-exclude .readthedocs.yml
-exclude sweep.yaml
diff --git a/README.md b/README.md
index fbd08e9..7609c25 100644
--- a/README.md
+++ b/README.md
@@ -8,14 +8,15 @@
 
 # tsml-py
 
-A toolkit for in-development time series machine learning algorithms.
+A repository for in-development time series machine learning algorithms and other odd
+bits by Matthew Middlehurst.
 
 Please see [`tsml_eval`](https://github.com/time-series-machine-learning/tsml-eval) and
 [`aeon`](https://github.com/aeon-toolkit/aeon) for more developed and stable packages. This package
 is more of a sandbox for testing out new ideas and algorithms. It may contain some
 algorithms and implementations that are not available in the other toolkits.
 
-The current release of `tsml` is v0.4.0.
+The current release of `tsml` is v0.5.0.
 
 ## Installation
 
diff --git a/pyproject.toml b/pyproject.toml
index 7679260..3cb3db7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "tsml"
-version = "0.4.0"
-description = "A toolkit for time series machine learning algorithms."
+version = "0.5.0"
+description = "A development sandbox for time series machine learning algorithms."
 authors = [
     {name = "Matthew Middlehurst", email = "m.b.middlehurst@soton.ac.uk"},
 ]
diff --git a/tsml/__init__.py b/tsml/__init__.py
index da155f0..90b5545 100644
--- a/tsml/__init__.py
+++ b/tsml/__init__.py
@@ -1,3 +1,3 @@
 """tsml."""
 
-__version__ = "0.4.0"
+__version__ = "0.5.0"

From 697ab0e9a6953c6b6d82c04b471c81a5b73accc9 Mon Sep 17 00:00:00 2001
From: Matthew Middlehurst <m.middlehurst@uea.ac.uk>
Date: Wed, 13 Nov 2024 10:48:41 +0000
Subject: [PATCH 2/2] seql

---
 .github/workflows/release.yml                 |   4 +-
 pyproject.toml                                |   3 +-
 tsml/dictionary_based/__init__.py             |   9 +
 tsml/dictionary_based/_mrseql.py              | 185 ++++++++++++++++++
 .../_mrsqm.py                                 |  11 --
 tsml/shapelet_based/__init__.py               |   2 -
 6 files changed, 198 insertions(+), 16 deletions(-)
 create mode 100644 tsml/dictionary_based/__init__.py
 create mode 100644 tsml/dictionary_based/_mrseql.py
 rename tsml/{shapelet_based => dictionary_based}/_mrsqm.py (94%)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 53dbf44..01621ad 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -79,14 +79,14 @@ jobs:
         with:
           timeout_minutes: 30
           max_attempts: 3
-          command: python -m pip install "${env:WHEELNAME}[dev,all_extras,unstable_extras]"
+          command: python -m pip install "${env:WHEELNAME}[dev,all_extras]"
       - if: matrix.os != 'windows-2022'
         name: Unix install
         uses: nick-fields/retry@v3
         with:
           timeout_minutes: 30
           max_attempts: 3
-          command: python -m pip install "${{ env.WHEELNAME }}[dev,all_extras,unstable_extras]"
+          command: python -m pip install "${{ env.WHEELNAME }}[dev,all_extras]"
 
       - name: Tests
         run: python -m pytest -n logical
diff --git a/pyproject.toml b/pyproject.toml
index 3cb3db7..6a52ed1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -57,7 +57,8 @@ all_extras = [
 unstable_extras = [
     "pycatch22",
     "pyfftw>=0.12.0; python_version < '3.12'",  # requires fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
-    "mrsqm>=0.0.7; platform_system != 'Windows' and python_version < '3.12'",  # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
+    "mrsqm>=0.0.7; platform_system == 'Linux' and python_version < '3.12'",  # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
+    "mrseql>=0.0.4,<0.1.0; platform_system == 'Linux' and python_version < '3.12'",  # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
 ]
 dev = [
     "pre-commit",
diff --git a/tsml/dictionary_based/__init__.py b/tsml/dictionary_based/__init__.py
new file mode 100644
index 0000000..0c2ceaf
--- /dev/null
+++ b/tsml/dictionary_based/__init__.py
@@ -0,0 +1,9 @@
+"""Dictionary-based estimators."""
+
+__all__ = [
+    "MrSEQLClassifier",
+    "MrSQMClassifier",
+]
+
+from tsml.dictionary_based._mrseql import MrSEQLClassifier
+from tsml.dictionary_based._mrsqm import MrSQMClassifier
diff --git a/tsml/dictionary_based/_mrseql.py b/tsml/dictionary_based/_mrseql.py
new file mode 100644
index 0000000..9197b23
--- /dev/null
+++ b/tsml/dictionary_based/_mrseql.py
@@ -0,0 +1,185 @@
+"""Multiple Representations Sequence Learning (MrSEQL) Classifier."""
+
+from typing import List, Union
+
+import numpy as np
+import pandas as pd
+from sklearn.base import ClassifierMixin
+from sklearn.utils.multiclass import check_classification_targets
+from sklearn.utils.validation import check_is_fitted
+
+from tsml.base import BaseTimeSeriesEstimator
+from tsml.utils.validation import _check_optional_dependency
+
+
+class MrSEQLClassifier(ClassifierMixin, BaseTimeSeriesEstimator):
+    """
+    Multiple Representations Sequence Learning (MrSEQL) Classifier.
+
+    This is a wrapper for the MrSEQLClassifier algorithm from the `mrseql` package.
+    MrSEQL is not included in ``all_extras`` as it requires gcc and fftw
+    (http://www.fftw.org/index.html) to be installed for Windows and some Linux OS.
+
+    Overview: MrSEQL extends the symbolic sequence classifier (SEQL) to work with
+    multiple symbolic representations of time series, using features extracted from the
+    SAX and SFA transformations.
+
+    Parameters
+    ----------
+    seql_mode : "clf" or "fs", default="fs".
+        If "fs", trains a logistic regression model with features extracted by SEQL.
+        IF "clf", builds an ensemble of SEQL models
+    symrep : "sax" or "sfa", or ["sax", "sfa"], default = "sax"
+        The symbolic features to extract from the time series.
+    custom_config : dict, default=None
+        Additional configuration for the symbolic transformations. See the original
+        package for details. ``symrep`` will be ignored if used.
+
+    References
+    ----------
+    .. [1] Le Nguyen, Thach, et al. "Interpretable time series classification using
+        linear models and multi-resolution multi-domain symbolic representations."
+        Data mining and knowledge discovery 33 (2019): 1183-1222.
+    """
+
+    def __init__(self, seql_mode="fs", symrep=("sax"), custom_config=None) -> None:
+        self.seql_mode = seql_mode
+        self.symrep = symrep
+        self.custom_config = custom_config
+
+        _check_optional_dependency("mrseql", "mrseql", self)
+
+        super().__init__()
+
+    def fit(self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray) -> object:
+        """Fit the estimator to training data.
+
+        Parameters
+        ----------
+        X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
+            The training data.
+        y : 1D np.ndarray of shape (n_instances)
+            The class labels for fitting, indices correspond to instance indices in X
+
+        Returns
+        -------
+        self :
+            Reference to self.
+        """
+        X, y = self._validate_data(X=X, y=y, ensure_min_samples=2)
+        X = self._convert_X(X)
+
+        check_classification_targets(y)
+
+        self.n_instances_, self.n_dims_, self.series_length_ = (
+            X.shape if X.ndim == 3 else (X.shape[0], 1, X.shape[1])
+        )
+        self.classes_ = np.unique(y)
+        self.n_classes_ = self.classes_.shape[0]
+        self.class_dictionary_ = {}
+        for index, class_val in enumerate(self.classes_):
+            self.class_dictionary_[class_val] = index
+
+        if self.n_classes_ == 1:
+            return self
+
+        from mrseql import MrSEQLClassifier
+
+        _X = _convert_data(X)
+
+        self.clf_ = MrSEQLClassifier(
+            seql_mode=self.seql_mode,
+            symrep=self.symrep,
+            custom_config=self.custom_config,
+        )
+        self.clf_.fit(_X, y)
+
+        return self
+
+    def predict(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
+        """Predicts labels for sequences in X.
+
+        Parameters
+        ----------
+        X : 3D np.array of shape (n_instances, n_channels, n_timepoints)
+            The testing data.
+
+        Returns
+        -------
+        y : array-like of shape (n_instances)
+            Predicted class labels.
+        """
+        check_is_fitted(self)
+
+        # treat case of single class seen in fit
+        if self.n_classes_ == 1:
+            return np.repeat(list(self.class_dictionary_.keys()), X.shape[0], axis=0)
+
+        X = self._validate_data(X=X, reset=False)
+        X = self._convert_X(X)
+
+        return self.clf_.predict(_convert_data(X))
+
+    def predict_proba(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
+        """Predicts labels probabilities for sequences in X.
+
+        Parameters
+        ----------
+        X : 3D np.array of shape (n_instances, n_channels, n_timepoints)
+            The testing data.
+
+        Returns
+        -------
+        y : array-like of shape (n_instances, n_classes_)
+            Predicted probabilities using the ordering in classes_.
+        """
+        check_is_fitted(self)
+
+        # treat case of single class seen in fit
+        if self.n_classes_ == 1:
+            return np.repeat([[1]], X.shape[0], axis=0)
+
+        X = self._validate_data(X=X, reset=False)
+        X = self._convert_X(X)
+
+        return self.clf_.predict_proba(_convert_data(X))
+
+    def _more_tags(self) -> dict:
+        return {
+            "non_deterministic": True,
+            "_xfail_checks": {"check_estimators_pickle": "External failure to pickle."},
+            "optional_dependency": True,
+        }
+
+    @classmethod
+    def get_test_params(
+        cls, parameter_set: Union[str, None] = None
+    ) -> Union[dict, List[dict]]:
+        """Return unit test parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : None or str, default=None
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return `"default"` set.
+
+        Returns
+        -------
+        params : dict or list of dict
+            Parameters to create testing instances of the class.
+        """
+        return {}
+
+
+def _convert_data(X):
+    column_list = []
+    for i in range(X.shape[1]):
+        nested_column = (
+            pd.DataFrame(X[:, i, :])
+            .apply(lambda x: [pd.Series(x, dtype=X.dtype)], axis=1)
+            .str[0]
+            .rename(str(i))
+        )
+        column_list.append(nested_column)
+    df = pd.concat(column_list, axis=1)
+    return df
diff --git a/tsml/shapelet_based/_mrsqm.py b/tsml/dictionary_based/_mrsqm.py
similarity index 94%
rename from tsml/shapelet_based/_mrsqm.py
rename to tsml/dictionary_based/_mrsqm.py
index b1367ba..87fe4e4 100644
--- a/tsml/shapelet_based/_mrsqm.py
+++ b/tsml/dictionary_based/_mrsqm.py
@@ -59,17 +59,6 @@ class MrSQMClassifier(ClassifierMixin, BaseTimeSeriesEstimator):
     .. [2] Nguyen, Thach Le, and Georgiana Ifrim. "MrSQM: Fast time series
         classification with symbolic representations." arXiv preprint arXiv:2109.01036
         (2021).
-
-    Examples
-    --------
-    >>> from tsml.shapelet_based import MrSQMClassifier
-    >>> from tsml.utils.testing import generate_3d_test_data
-    >>> X, y = generate_3d_test_data(n_samples=8, series_length=10, random_state=0)
-    >>> clf = MrSQMClassifier(random_state=0)  # doctest: +SKIP
-    >>> clf.fit(X, y)  # doctest: +SKIP
-    MrSQMClassifier(...)
-    >>> clf.predict(X)  # doctest: +SKIP
-    array([0, 1, 1, 0, 0, 1, 0, 1])
     """
 
     def __init__(
diff --git a/tsml/shapelet_based/__init__.py b/tsml/shapelet_based/__init__.py
index dba6f01..b36f609 100644
--- a/tsml/shapelet_based/__init__.py
+++ b/tsml/shapelet_based/__init__.py
@@ -1,12 +1,10 @@
 """Shapelet-based estimators."""
 
 __all__ = [
-    "MrSQMClassifier",
     "RandomShapeletForestClassifier",
     "RandomShapeletForestRegressor",
 ]
 
-from tsml.shapelet_based._mrsqm import MrSQMClassifier
 from tsml.shapelet_based._rsf import (
     RandomShapeletForestClassifier,
     RandomShapeletForestRegressor,