diff --git a/skore/src/skore/sklearn/_estimator/feature_importance_accessor.py b/skore/src/skore/sklearn/_estimator/feature_importance_accessor.py index 7d55f86c9..0e3a4b300 100644 --- a/skore/src/skore/sklearn/_estimator/feature_importance_accessor.py +++ b/skore/src/skore/sklearn/_estimator/feature_importance_accessor.py @@ -203,10 +203,24 @@ def coefficients(self) -> pd.DataFrame: if isinstance(parent_estimator, Pipeline) else parent_estimator ) - intercept = np.atleast_2d(estimator.intercept_) - coef = np.atleast_2d(estimator.coef_) - - data = np.concatenate([intercept, coef.T]) + try: + intercept = np.atleast_2d(estimator.intercept_) + except AttributeError: + # SGDOneClassSVM does not expose `intercept_` + intercept = None + + try: + coef = np.atleast_2d(estimator.coef_) + except AttributeError: + # TransformedTargetRegressor() does not expose `coef_` + coef = np.atleast_2d(estimator.regressor_.coef_) + + if intercept is None: + data = coef.T + index = list(feature_names) + else: + data = np.concatenate([intercept, coef.T]) + index = ["Intercept"] + list(feature_names) if data.shape[1] == 1: columns = ["Coefficient"] @@ -217,7 +231,7 @@ def coefficients(self) -> pd.DataFrame: df = pd.DataFrame( data=data, - index=["Intercept"] + list(feature_names), + index=index, columns=columns, ) diff --git a/skore/src/skore/utils/_accessor.py b/skore/src/skore/utils/_accessor.py index 6f3e63995..7a5d8bf28 100644 --- a/skore/src/skore/utils/_accessor.py +++ b/skore/src/skore/utils/_accessor.py @@ -59,6 +59,12 @@ def check(accessor: Any) -> bool: ) if hasattr(estimator, "coef_"): return True + try: # e.g. TransformedTargetRegressor() + if hasattr(estimator.regressor_, "coef_"): + return True + except AttributeError as msg: + if "object has no attribute 'regressor_'" not in str(msg): + raise raise AttributeError( f"Estimator {parent_estimator} is not a supported estimator by " "the function called." diff --git a/skore/tests/unit/sklearn/estimator/feature_importance/conftest.py b/skore/tests/unit/sklearn/estimator/feature_importance/conftest.py index 7358404fb..2f4012910 100644 --- a/skore/tests/unit/sklearn/estimator/feature_importance/conftest.py +++ b/skore/tests/unit/sklearn/estimator/feature_importance/conftest.py @@ -1,3 +1,4 @@ +import numpy as np import pytest from sklearn.datasets import make_classification, make_regression @@ -7,6 +8,12 @@ def regression_data(): return make_regression(n_features=5, random_state=42) +@pytest.fixture +def positive_regression_data(): + X, y = make_regression(n_features=5, random_state=42) + return X, np.abs(y) + 0.1 + + @pytest.fixture def classification_data(): return make_classification(n_features=5, random_state=42) diff --git a/skore/tests/unit/sklearn/estimator/feature_importance/test_coefficients.py b/skore/tests/unit/sklearn/estimator/feature_importance/test_coefficients.py index 17f39f25d..0f00f97a8 100644 --- a/skore/tests/unit/sklearn/estimator/feature_importance/test_coefficients.py +++ b/skore/tests/unit/sklearn/estimator/feature_importance/test_coefficients.py @@ -7,6 +7,7 @@ from sklearn.pipeline import Pipeline, make_pipeline from sklearn.preprocessing import StandardScaler from skore import EstimatorReport +from skore.externals._sklearn_compat import get_tags @pytest.mark.parametrize( @@ -119,29 +120,25 @@ def test_estimator_report_coefficients_pandas_dataframe(estimator): [ pytest.param(sklearn.svm.NuSVC(kernel="linear"), id="NuSVC"), pytest.param(sklearn.svm.NuSVR(kernel="linear"), id="NuSVR"), - # pytest.param(sklearn.svm.OneClassSVM(), id="OneClassSVM"), pytest.param(sklearn.svm.SVC(kernel="linear"), id="SVC"), pytest.param(sklearn.svm.SVR(kernel="linear"), id="SVR"), pytest.param(sklearn.svm.LinearSVC(), id="LinearSVC"), pytest.param(sklearn.svm.LinearSVR(), id="LinearSVR"), - # pytest.param(sklearn.cross_decomposition.CCA(), id="CCA"), - # pytest.param(sklearn.cross_decomposition.PLSCanonical(), id="PLSCanonical"), pytest.param(sklearn.cross_decomposition.PLSRegression(), id="PLSRegression"), pytest.param( sklearn.discriminant_analysis.LinearDiscriminantAnalysis(), id="LinearDiscriminantAnalysis", ), - # pytest.param( - # sklearn.compose.TransformedTargetRegressor(), - # id="TransformedTargetRegressor", - # ), + pytest.param( + sklearn.compose.TransformedTargetRegressor(), + id="TransformedTargetRegressor", + ), pytest.param(sklearn.linear_model.ElasticNet(), id="ElasticNet"), - pytest.param(sklearn.linear_model.Lasso(), id="Lasso"), pytest.param(sklearn.linear_model.ARDRegression(), id="ARDRegression"), pytest.param(sklearn.linear_model.BayesianRidge(), id="BayesianRidge"), pytest.param(sklearn.linear_model.ElasticNet(), id="ElasticNet"), pytest.param(sklearn.linear_model.ElasticNetCV(), id="ElasticNetCV"), - # pytest.param(sklearn.linear_model.GammaRegressor(), id="GammaRegressor"), + pytest.param(sklearn.linear_model.GammaRegressor(), id="GammaRegressor"), pytest.param(sklearn.linear_model.HuberRegressor(), id="HuberRegressor"), pytest.param(sklearn.linear_model.Lars(), id="Lars"), pytest.param(sklearn.linear_model.LarsCV(), id="LarsCV"), @@ -157,14 +154,6 @@ def test_estimator_report_coefficients_pandas_dataframe(estimator): pytest.param( sklearn.linear_model.LogisticRegressionCV(), id="LogisticRegressionCV" ), - # pytest.param( - # sklearn.linear_model.MultiTaskElasticNet(), id="MultiTaskElasticNet" - # ), - # pytest.param( - # sklearn.linear_model.MultiTaskElasticNetCV(), id="MultiTaskElasticNetCV" - # ), - # pytest.param(sklearn.linear_model.MultiTaskLasso(), id="MultiTaskLasso"), - # pytest.param(sklearn.linear_model.MultiTaskLassoCV(), id="MultiTaskLassoCV"), pytest.param( sklearn.linear_model.OrthogonalMatchingPursuit(), id="OrthogonalMatchingPursuit", @@ -182,37 +171,60 @@ def test_estimator_report_coefficients_pandas_dataframe(estimator): id="PassiveAggressiveRegressor", ), pytest.param(sklearn.linear_model.Perceptron(), id="Perceptron"), - # pytest.param(sklearn.linear_model.PoissonRegressor(), id="PoissonRegressor"), + pytest.param(sklearn.linear_model.PoissonRegressor(), id="PoissonRegressor"), pytest.param(sklearn.linear_model.QuantileRegressor(), id="QuantileRegressor"), pytest.param(sklearn.linear_model.Ridge(), id="Ridge"), pytest.param(sklearn.linear_model.RidgeClassifier(), id="RidgeClassifier"), pytest.param(sklearn.linear_model.RidgeClassifierCV(), id="RidgeClassifierCV"), pytest.param(sklearn.linear_model.RidgeCV(), id="RidgeCV"), pytest.param(sklearn.linear_model.SGDClassifier(), id="SGDClassifier"), - # pytest.param(sklearn.linear_model.SGDOneClassSVM(), id="SGDOneClassSVM"), pytest.param(sklearn.linear_model.SGDRegressor(), id="SGDRegressor"), pytest.param(sklearn.linear_model.TheilSenRegressor(), id="TheilSenRegressor"), pytest.param(sklearn.linear_model.TweedieRegressor(), id="TweedieRegressor"), + # The following models would be tested in the future when the `EstimatorReport` + # will have metrics specific to these models: + # + # 1. multi-task + # pytest.param( + # sklearn.linear_model.MultiTaskElasticNet(), id="MultiTaskElasticNet" + # ), + # pytest.param( + # sklearn.linear_model.MultiTaskElasticNetCV(), id="MultiTaskElasticNetCV" + # ), + # pytest.param(sklearn.linear_model.MultiTaskLasso(), id="MultiTaskLasso"), + # pytest.param(sklearn.linear_model.MultiTaskLassoCV(), id="MultiTaskLassoCV"), + # 2. cross_decomposition + # pytest.param(sklearn.cross_decomposition.CCA(), id="CCA"), + # pytest.param(sklearn.cross_decomposition.PLSCanonical(), id="PLSCanonical"), + # 3. outlier detectors + # pytest.param(sklearn.linear_model.SGDOneClassSVM(), id="SGDOneClassSVM"), + # pytest.param(sklearn.svm.OneClassSVM(kernel="linear"), id="OneClassSVM"), ], ) def test_all_sklearn_estimators( - request, estimator, regression_data, classification_data + request, + estimator, + regression_data, + positive_regression_data, + classification_data, ): """Check that `coefficients` is supported for every sklearn estimator.""" if is_classifier(estimator): X, y = classification_data elif is_regressor(estimator): - X, y = regression_data + if get_tags(estimator).target_tags.positive_only: + X, y = positive_regression_data + else: + X, y = regression_data else: - raise Exception("Estimator is neither a classifier nor a regressor") + raise Exception("Estimator not in ['classifier', 'regressor']") estimator.fit(X, y) report = EstimatorReport(estimator) result = report.feature_importance.coefficients() - assert result.shape == (6, 1) - assert result.index.tolist() == [ + rows = [ "Intercept", "Feature #0", "Feature #1", @@ -220,6 +232,12 @@ def test_all_sklearn_estimators( "Feature #3", "Feature #4", ] + if result.shape == (5, 1): # for TransformedTargetRegressor() + assert rows[1:] == result.index.tolist() + else: + assert result.shape == (6, 1) + assert rows == result.index.tolist() + assert result.columns.tolist() == ["Coefficient"] diff --git a/skore/tests/unit/utils/test_accessors.py b/skore/tests/unit/utils/test_accessors.py index 98d7c8429..dd0ee7aec 100644 --- a/skore/tests/unit/utils/test_accessors.py +++ b/skore/tests/unit/utils/test_accessors.py @@ -82,6 +82,10 @@ class Estimator: def __init__(self): self.coef_ = 0 + class MetaEstimator: + def __init__(self): + self.regressor_ = Estimator() + parent = MockParent(Estimator()) accessor = MockAccessor(parent) @@ -92,6 +96,11 @@ def __init__(self): assert _check_has_coef()(accessor) + parent = MockParent(MetaEstimator()) + accessor = MockAccessor(parent) + + assert _check_has_coef()(accessor) + parent = MockParent(estimator="hello") accessor = MockAccessor(parent)