From 5e3503467caf65ac7a1fe50505475cb36f98dbc9 Mon Sep 17 00:00:00 2001 From: Matthew Middlehurst Date: Wed, 13 Nov 2024 09:48:09 +0000 Subject: [PATCH 1/2] release --- MANIFEST.in | 3 --- README.md | 5 +++-- pyproject.toml | 4 ++-- tsml/__init__.py | 2 +- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index b459cfe..28ac35a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,3 @@ -recursive-include docs * recursive-include tsml *.py recursive-include tsml/datasets *.ts include .coveragerc @@ -13,5 +12,3 @@ exclude .codecov.yml exclude .gitattributes exclude .gitignore exclude .pre-commit-config.yaml -exclude .readthedocs.yml -exclude sweep.yaml diff --git a/README.md b/README.md index fbd08e9..7609c25 100644 --- a/README.md +++ b/README.md @@ -8,14 +8,15 @@ # tsml-py -A toolkit for in-development time series machine learning algorithms. +A repository for in-development time series machine learning algorithms and other odd +bits by Matthew Middlehurst. Please see [`tsml_eval`](https://github.com/time-series-machine-learning/tsml-eval) and [`aeon`](https://github.com/aeon-toolkit/aeon) for more developed and stable packages. This package is more of a sandbox for testing out new ideas and algorithms. It may contain some algorithms and implementations that are not available in the other toolkits. -The current release of `tsml` is v0.4.0. +The current release of `tsml` is v0.5.0. ## Installation diff --git a/pyproject.toml b/pyproject.toml index 7679260..3cb3db7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta" [project] name = "tsml" -version = "0.4.0" -description = "A toolkit for time series machine learning algorithms." +version = "0.5.0" +description = "A development sandbox for time series machine learning algorithms." authors = [ {name = "Matthew Middlehurst", email = "m.b.middlehurst@soton.ac.uk"}, ] diff --git a/tsml/__init__.py b/tsml/__init__.py index da155f0..90b5545 100644 --- a/tsml/__init__.py +++ b/tsml/__init__.py @@ -1,3 +1,3 @@ """tsml.""" -__version__ = "0.4.0" +__version__ = "0.5.0" From 697ab0e9a6953c6b6d82c04b471c81a5b73accc9 Mon Sep 17 00:00:00 2001 From: Matthew Middlehurst Date: Wed, 13 Nov 2024 10:48:41 +0000 Subject: [PATCH 2/2] seql --- .github/workflows/release.yml | 4 +- pyproject.toml | 3 +- tsml/dictionary_based/__init__.py | 9 + tsml/dictionary_based/_mrseql.py | 185 ++++++++++++++++++ .../_mrsqm.py | 11 -- tsml/shapelet_based/__init__.py | 2 - 6 files changed, 198 insertions(+), 16 deletions(-) create mode 100644 tsml/dictionary_based/__init__.py create mode 100644 tsml/dictionary_based/_mrseql.py rename tsml/{shapelet_based => dictionary_based}/_mrsqm.py (94%) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 53dbf44..01621ad 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -79,14 +79,14 @@ jobs: with: timeout_minutes: 30 max_attempts: 3 - command: python -m pip install "${env:WHEELNAME}[dev,all_extras,unstable_extras]" + command: python -m pip install "${env:WHEELNAME}[dev,all_extras]" - if: matrix.os != 'windows-2022' name: Unix install uses: nick-fields/retry@v3 with: timeout_minutes: 30 max_attempts: 3 - command: python -m pip install "${{ env.WHEELNAME }}[dev,all_extras,unstable_extras]" + command: python -m pip install "${{ env.WHEELNAME }}[dev,all_extras]" - name: Tests run: python -m pytest -n logical diff --git a/pyproject.toml b/pyproject.toml index 3cb3db7..6a52ed1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,8 @@ all_extras = [ unstable_extras = [ "pycatch22", "pyfftw>=0.12.0; python_version < '3.12'", # requires fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html) - "mrsqm>=0.0.7; platform_system != 'Windows' and python_version < '3.12'", # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html) + "mrsqm>=0.0.7; platform_system == 'Linux' and python_version < '3.12'", # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html) + "mrseql>=0.0.4,<0.1.0; platform_system == 'Linux' and python_version < '3.12'", # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html) ] dev = [ "pre-commit", diff --git a/tsml/dictionary_based/__init__.py b/tsml/dictionary_based/__init__.py new file mode 100644 index 0000000..0c2ceaf --- /dev/null +++ b/tsml/dictionary_based/__init__.py @@ -0,0 +1,9 @@ +"""Dictionary-based estimators.""" + +__all__ = [ + "MrSEQLClassifier", + "MrSQMClassifier", +] + +from tsml.dictionary_based._mrseql import MrSEQLClassifier +from tsml.dictionary_based._mrsqm import MrSQMClassifier diff --git a/tsml/dictionary_based/_mrseql.py b/tsml/dictionary_based/_mrseql.py new file mode 100644 index 0000000..9197b23 --- /dev/null +++ b/tsml/dictionary_based/_mrseql.py @@ -0,0 +1,185 @@ +"""Multiple Representations Sequence Learning (MrSEQL) Classifier.""" + +from typing import List, Union + +import numpy as np +import pandas as pd +from sklearn.base import ClassifierMixin +from sklearn.utils.multiclass import check_classification_targets +from sklearn.utils.validation import check_is_fitted + +from tsml.base import BaseTimeSeriesEstimator +from tsml.utils.validation import _check_optional_dependency + + +class MrSEQLClassifier(ClassifierMixin, BaseTimeSeriesEstimator): + """ + Multiple Representations Sequence Learning (MrSEQL) Classifier. + + This is a wrapper for the MrSEQLClassifier algorithm from the `mrseql` package. + MrSEQL is not included in ``all_extras`` as it requires gcc and fftw + (http://www.fftw.org/index.html) to be installed for Windows and some Linux OS. + + Overview: MrSEQL extends the symbolic sequence classifier (SEQL) to work with + multiple symbolic representations of time series, using features extracted from the + SAX and SFA transformations. + + Parameters + ---------- + seql_mode : "clf" or "fs", default="fs". + If "fs", trains a logistic regression model with features extracted by SEQL. + IF "clf", builds an ensemble of SEQL models + symrep : "sax" or "sfa", or ["sax", "sfa"], default = "sax" + The symbolic features to extract from the time series. + custom_config : dict, default=None + Additional configuration for the symbolic transformations. See the original + package for details. ``symrep`` will be ignored if used. + + References + ---------- + .. [1] Le Nguyen, Thach, et al. "Interpretable time series classification using + linear models and multi-resolution multi-domain symbolic representations." + Data mining and knowledge discovery 33 (2019): 1183-1222. + """ + + def __init__(self, seql_mode="fs", symrep=("sax"), custom_config=None) -> None: + self.seql_mode = seql_mode + self.symrep = symrep + self.custom_config = custom_config + + _check_optional_dependency("mrseql", "mrseql", self) + + super().__init__() + + def fit(self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray) -> object: + """Fit the estimator to training data. + + Parameters + ---------- + X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints) + The training data. + y : 1D np.ndarray of shape (n_instances) + The class labels for fitting, indices correspond to instance indices in X + + Returns + ------- + self : + Reference to self. + """ + X, y = self._validate_data(X=X, y=y, ensure_min_samples=2) + X = self._convert_X(X) + + check_classification_targets(y) + + self.n_instances_, self.n_dims_, self.series_length_ = ( + X.shape if X.ndim == 3 else (X.shape[0], 1, X.shape[1]) + ) + self.classes_ = np.unique(y) + self.n_classes_ = self.classes_.shape[0] + self.class_dictionary_ = {} + for index, class_val in enumerate(self.classes_): + self.class_dictionary_[class_val] = index + + if self.n_classes_ == 1: + return self + + from mrseql import MrSEQLClassifier + + _X = _convert_data(X) + + self.clf_ = MrSEQLClassifier( + seql_mode=self.seql_mode, + symrep=self.symrep, + custom_config=self.custom_config, + ) + self.clf_.fit(_X, y) + + return self + + def predict(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray: + """Predicts labels for sequences in X. + + Parameters + ---------- + X : 3D np.array of shape (n_instances, n_channels, n_timepoints) + The testing data. + + Returns + ------- + y : array-like of shape (n_instances) + Predicted class labels. + """ + check_is_fitted(self) + + # treat case of single class seen in fit + if self.n_classes_ == 1: + return np.repeat(list(self.class_dictionary_.keys()), X.shape[0], axis=0) + + X = self._validate_data(X=X, reset=False) + X = self._convert_X(X) + + return self.clf_.predict(_convert_data(X)) + + def predict_proba(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray: + """Predicts labels probabilities for sequences in X. + + Parameters + ---------- + X : 3D np.array of shape (n_instances, n_channels, n_timepoints) + The testing data. + + Returns + ------- + y : array-like of shape (n_instances, n_classes_) + Predicted probabilities using the ordering in classes_. + """ + check_is_fitted(self) + + # treat case of single class seen in fit + if self.n_classes_ == 1: + return np.repeat([[1]], X.shape[0], axis=0) + + X = self._validate_data(X=X, reset=False) + X = self._convert_X(X) + + return self.clf_.predict_proba(_convert_data(X)) + + def _more_tags(self) -> dict: + return { + "non_deterministic": True, + "_xfail_checks": {"check_estimators_pickle": "External failure to pickle."}, + "optional_dependency": True, + } + + @classmethod + def get_test_params( + cls, parameter_set: Union[str, None] = None + ) -> Union[dict, List[dict]]: + """Return unit test parameter settings for the estimator. + + Parameters + ---------- + parameter_set : None or str, default=None + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict + Parameters to create testing instances of the class. + """ + return {} + + +def _convert_data(X): + column_list = [] + for i in range(X.shape[1]): + nested_column = ( + pd.DataFrame(X[:, i, :]) + .apply(lambda x: [pd.Series(x, dtype=X.dtype)], axis=1) + .str[0] + .rename(str(i)) + ) + column_list.append(nested_column) + df = pd.concat(column_list, axis=1) + return df diff --git a/tsml/shapelet_based/_mrsqm.py b/tsml/dictionary_based/_mrsqm.py similarity index 94% rename from tsml/shapelet_based/_mrsqm.py rename to tsml/dictionary_based/_mrsqm.py index b1367ba..87fe4e4 100644 --- a/tsml/shapelet_based/_mrsqm.py +++ b/tsml/dictionary_based/_mrsqm.py @@ -59,17 +59,6 @@ class MrSQMClassifier(ClassifierMixin, BaseTimeSeriesEstimator): .. [2] Nguyen, Thach Le, and Georgiana Ifrim. "MrSQM: Fast time series classification with symbolic representations." arXiv preprint arXiv:2109.01036 (2021). - - Examples - -------- - >>> from tsml.shapelet_based import MrSQMClassifier - >>> from tsml.utils.testing import generate_3d_test_data - >>> X, y = generate_3d_test_data(n_samples=8, series_length=10, random_state=0) - >>> clf = MrSQMClassifier(random_state=0) # doctest: +SKIP - >>> clf.fit(X, y) # doctest: +SKIP - MrSQMClassifier(...) - >>> clf.predict(X) # doctest: +SKIP - array([0, 1, 1, 0, 0, 1, 0, 1]) """ def __init__( diff --git a/tsml/shapelet_based/__init__.py b/tsml/shapelet_based/__init__.py index dba6f01..b36f609 100644 --- a/tsml/shapelet_based/__init__.py +++ b/tsml/shapelet_based/__init__.py @@ -1,12 +1,10 @@ """Shapelet-based estimators.""" __all__ = [ - "MrSQMClassifier", "RandomShapeletForestClassifier", "RandomShapeletForestRegressor", ] -from tsml.shapelet_based._mrsqm import MrSQMClassifier from tsml.shapelet_based._rsf import ( RandomShapeletForestClassifier, RandomShapeletForestRegressor,