Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,14 @@ jobs:
with:
timeout_minutes: 30
max_attempts: 3
command: python -m pip install "${env:WHEELNAME}[dev,all_extras,unstable_extras]"
command: python -m pip install "${env:WHEELNAME}[dev,all_extras]"
- if: matrix.os != 'windows-2022'
name: Unix install
uses: nick-fields/retry@v3
with:
timeout_minutes: 30
max_attempts: 3
command: python -m pip install "${{ env.WHEELNAME }}[dev,all_extras,unstable_extras]"
command: python -m pip install "${{ env.WHEELNAME }}[dev,all_extras]"

- name: Tests
run: python -m pytest -n logical
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ all_extras = [
unstable_extras = [
"pycatch22",
"pyfftw>=0.12.0; python_version < '3.12'", # requires fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
"mrsqm>=0.0.7; platform_system != 'Windows' and python_version < '3.12'", # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
"mrsqm>=0.0.7; platform_system == 'Linux' and python_version < '3.12'", # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
"mrseql>=0.0.4,<0.1.0; platform_system == 'Linux' and python_version < '3.12'", # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
]
dev = [
"pre-commit",
Expand Down
9 changes: 9 additions & 0 deletions tsml/dictionary_based/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""Dictionary-based estimators."""

__all__ = [
"MrSEQLClassifier",
"MrSQMClassifier",
]

from tsml.dictionary_based._mrseql import MrSEQLClassifier
from tsml.dictionary_based._mrsqm import MrSQMClassifier
185 changes: 185 additions & 0 deletions tsml/dictionary_based/_mrseql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
"""Multiple Representations Sequence Learning (MrSEQL) Classifier."""

from typing import List, Union

import numpy as np
import pandas as pd
from sklearn.base import ClassifierMixin
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_is_fitted

from tsml.base import BaseTimeSeriesEstimator
from tsml.utils.validation import _check_optional_dependency


class MrSEQLClassifier(ClassifierMixin, BaseTimeSeriesEstimator):
"""
Multiple Representations Sequence Learning (MrSEQL) Classifier.

This is a wrapper for the MrSEQLClassifier algorithm from the `mrseql` package.
MrSEQL is not included in ``all_extras`` as it requires gcc and fftw
(http://www.fftw.org/index.html) to be installed for Windows and some Linux OS.

Overview: MrSEQL extends the symbolic sequence classifier (SEQL) to work with
multiple symbolic representations of time series, using features extracted from the
SAX and SFA transformations.

Parameters
----------
seql_mode : "clf" or "fs", default="fs".
If "fs", trains a logistic regression model with features extracted by SEQL.
IF "clf", builds an ensemble of SEQL models
symrep : "sax" or "sfa", or ["sax", "sfa"], default = "sax"
The symbolic features to extract from the time series.
custom_config : dict, default=None
Additional configuration for the symbolic transformations. See the original
package for details. ``symrep`` will be ignored if used.

References
----------
.. [1] Le Nguyen, Thach, et al. "Interpretable time series classification using
linear models and multi-resolution multi-domain symbolic representations."
Data mining and knowledge discovery 33 (2019): 1183-1222.
"""

def __init__(self, seql_mode="fs", symrep=("sax"), custom_config=None) -> None:
self.seql_mode = seql_mode
self.symrep = symrep
self.custom_config = custom_config

_check_optional_dependency("mrseql", "mrseql", self)

super().__init__()

def fit(self, X: Union[np.ndarray, List[np.ndarray]], y: np.ndarray) -> object:
"""Fit the estimator to training data.

Parameters
----------
X : 3D np.ndarray of shape (n_instances, n_channels, n_timepoints)
The training data.
y : 1D np.ndarray of shape (n_instances)
The class labels for fitting, indices correspond to instance indices in X

Returns
-------
self :
Reference to self.
"""
X, y = self._validate_data(X=X, y=y, ensure_min_samples=2)
X = self._convert_X(X)

check_classification_targets(y)

self.n_instances_, self.n_dims_, self.series_length_ = (
X.shape if X.ndim == 3 else (X.shape[0], 1, X.shape[1])
)
self.classes_ = np.unique(y)
self.n_classes_ = self.classes_.shape[0]
self.class_dictionary_ = {}
for index, class_val in enumerate(self.classes_):
self.class_dictionary_[class_val] = index

if self.n_classes_ == 1:
return self

from mrseql import MrSEQLClassifier

_X = _convert_data(X)

self.clf_ = MrSEQLClassifier(
seql_mode=self.seql_mode,
symrep=self.symrep,
custom_config=self.custom_config,
)
self.clf_.fit(_X, y)

return self

def predict(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
"""Predicts labels for sequences in X.

Parameters
----------
X : 3D np.array of shape (n_instances, n_channels, n_timepoints)
The testing data.

Returns
-------
y : array-like of shape (n_instances)
Predicted class labels.
"""
check_is_fitted(self)

# treat case of single class seen in fit
if self.n_classes_ == 1:
return np.repeat(list(self.class_dictionary_.keys()), X.shape[0], axis=0)

X = self._validate_data(X=X, reset=False)
X = self._convert_X(X)

return self.clf_.predict(_convert_data(X))

def predict_proba(self, X: Union[np.ndarray, List[np.ndarray]]) -> np.ndarray:
"""Predicts labels probabilities for sequences in X.

Parameters
----------
X : 3D np.array of shape (n_instances, n_channels, n_timepoints)
The testing data.

Returns
-------
y : array-like of shape (n_instances, n_classes_)
Predicted probabilities using the ordering in classes_.
"""
check_is_fitted(self)

# treat case of single class seen in fit
if self.n_classes_ == 1:
return np.repeat([[1]], X.shape[0], axis=0)

X = self._validate_data(X=X, reset=False)
X = self._convert_X(X)

return self.clf_.predict_proba(_convert_data(X))

def _more_tags(self) -> dict:
return {
"non_deterministic": True,
"_xfail_checks": {"check_estimators_pickle": "External failure to pickle."},
"optional_dependency": True,
}

@classmethod
def get_test_params(
cls, parameter_set: Union[str, None] = None
) -> Union[dict, List[dict]]:
"""Return unit test parameter settings for the estimator.

Parameters
----------
parameter_set : None or str, default=None
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.

Returns
-------
params : dict or list of dict
Parameters to create testing instances of the class.
"""
return {}


def _convert_data(X):
column_list = []
for i in range(X.shape[1]):
nested_column = (
pd.DataFrame(X[:, i, :])
.apply(lambda x: [pd.Series(x, dtype=X.dtype)], axis=1)
.str[0]
.rename(str(i))
)
column_list.append(nested_column)
df = pd.concat(column_list, axis=1)
return df
11 changes: 0 additions & 11 deletions tsml/shapelet_based/_mrsqm.py → tsml/dictionary_based/_mrsqm.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,6 @@ class MrSQMClassifier(ClassifierMixin, BaseTimeSeriesEstimator):
.. [2] Nguyen, Thach Le, and Georgiana Ifrim. "MrSQM: Fast time series
classification with symbolic representations." arXiv preprint arXiv:2109.01036
(2021).

Examples
--------
>>> from tsml.shapelet_based import MrSQMClassifier
>>> from tsml.utils.testing import generate_3d_test_data
>>> X, y = generate_3d_test_data(n_samples=8, series_length=10, random_state=0)
>>> clf = MrSQMClassifier(random_state=0) # doctest: +SKIP
>>> clf.fit(X, y) # doctest: +SKIP
MrSQMClassifier(...)
>>> clf.predict(X) # doctest: +SKIP
array([0, 1, 1, 0, 0, 1, 0, 1])
"""

def __init__(
Expand Down
2 changes: 0 additions & 2 deletions tsml/shapelet_based/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
"""Shapelet-based estimators."""

__all__ = [
"MrSQMClassifier",
"RandomShapeletForestClassifier",
"RandomShapeletForestRegressor",
]

from tsml.shapelet_based._mrsqm import MrSQMClassifier
from tsml.shapelet_based._rsf import (
RandomShapeletForestClassifier,
RandomShapeletForestRegressor,
Expand Down
Loading