Skip to content

test: Include all estimators (with coef_) in test_all_sklearn_estimators #1575

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Apr 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions skore/src/skore/sklearn/_estimator/feature_importance_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,24 @@ def coefficients(self) -> pd.DataFrame:
if isinstance(parent_estimator, Pipeline)
else parent_estimator
)
intercept = np.atleast_2d(estimator.intercept_)
coef = np.atleast_2d(estimator.coef_)

data = np.concatenate([intercept, coef.T])
try:
intercept = np.atleast_2d(estimator.intercept_)
except AttributeError:
# SGDOneClassSVM does not expose `intercept_`
intercept = None

try:
coef = np.atleast_2d(estimator.coef_)
except AttributeError:
# TransformedTargetRegressor() does not expose `coef_`
coef = np.atleast_2d(estimator.regressor_.coef_)

if intercept is None:
data = coef.T
index = list(feature_names)
else:
data = np.concatenate([intercept, coef.T])
index = ["Intercept"] + list(feature_names)

if data.shape[1] == 1:
columns = ["Coefficient"]
Expand All @@ -217,7 +231,7 @@ def coefficients(self) -> pd.DataFrame:

df = pd.DataFrame(
data=data,
index=["Intercept"] + list(feature_names),
index=index,
columns=columns,
)

Expand Down
6 changes: 6 additions & 0 deletions skore/src/skore/utils/_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ def check(accessor: Any) -> bool:
)
if hasattr(estimator, "coef_"):
return True
try: # e.g. TransformedTargetRegressor()
if hasattr(estimator.regressor_, "coef_"):
return True
except AttributeError as msg:
if "object has no attribute 'regressor_'" not in str(msg):
raise
raise AttributeError(
f"Estimator {parent_estimator} is not a supported estimator by "
"the function called."
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pytest
from sklearn.datasets import make_classification, make_regression

Expand All @@ -7,6 +8,12 @@ def regression_data():
return make_regression(n_features=5, random_state=42)


@pytest.fixture
def positive_regression_data():
X, y = make_regression(n_features=5, random_state=42)
return X, np.abs(y) + 0.1


@pytest.fixture
def classification_data():
return make_classification(n_features=5, random_state=42)
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler
from skore import EstimatorReport
from skore.externals._sklearn_compat import get_tags


@pytest.mark.parametrize(
Expand Down Expand Up @@ -119,29 +120,25 @@ def test_estimator_report_coefficients_pandas_dataframe(estimator):
[
pytest.param(sklearn.svm.NuSVC(kernel="linear"), id="NuSVC"),
pytest.param(sklearn.svm.NuSVR(kernel="linear"), id="NuSVR"),
# pytest.param(sklearn.svm.OneClassSVM(), id="OneClassSVM"),
pytest.param(sklearn.svm.SVC(kernel="linear"), id="SVC"),
pytest.param(sklearn.svm.SVR(kernel="linear"), id="SVR"),
pytest.param(sklearn.svm.LinearSVC(), id="LinearSVC"),
pytest.param(sklearn.svm.LinearSVR(), id="LinearSVR"),
# pytest.param(sklearn.cross_decomposition.CCA(), id="CCA"),
# pytest.param(sklearn.cross_decomposition.PLSCanonical(), id="PLSCanonical"),
pytest.param(sklearn.cross_decomposition.PLSRegression(), id="PLSRegression"),
pytest.param(
sklearn.discriminant_analysis.LinearDiscriminantAnalysis(),
id="LinearDiscriminantAnalysis",
),
# pytest.param(
# sklearn.compose.TransformedTargetRegressor(),
# id="TransformedTargetRegressor",
# ),
pytest.param(
sklearn.compose.TransformedTargetRegressor(),
id="TransformedTargetRegressor",
),
pytest.param(sklearn.linear_model.ElasticNet(), id="ElasticNet"),
pytest.param(sklearn.linear_model.Lasso(), id="Lasso"),
pytest.param(sklearn.linear_model.ARDRegression(), id="ARDRegression"),
pytest.param(sklearn.linear_model.BayesianRidge(), id="BayesianRidge"),
pytest.param(sklearn.linear_model.ElasticNet(), id="ElasticNet"),
pytest.param(sklearn.linear_model.ElasticNetCV(), id="ElasticNetCV"),
# pytest.param(sklearn.linear_model.GammaRegressor(), id="GammaRegressor"),
pytest.param(sklearn.linear_model.GammaRegressor(), id="GammaRegressor"),
pytest.param(sklearn.linear_model.HuberRegressor(), id="HuberRegressor"),
pytest.param(sklearn.linear_model.Lars(), id="Lars"),
pytest.param(sklearn.linear_model.LarsCV(), id="LarsCV"),
Expand All @@ -157,14 +154,6 @@ def test_estimator_report_coefficients_pandas_dataframe(estimator):
pytest.param(
sklearn.linear_model.LogisticRegressionCV(), id="LogisticRegressionCV"
),
# pytest.param(
# sklearn.linear_model.MultiTaskElasticNet(), id="MultiTaskElasticNet"
# ),
# pytest.param(
# sklearn.linear_model.MultiTaskElasticNetCV(), id="MultiTaskElasticNetCV"
# ),
# pytest.param(sklearn.linear_model.MultiTaskLasso(), id="MultiTaskLasso"),
# pytest.param(sklearn.linear_model.MultiTaskLassoCV(), id="MultiTaskLassoCV"),
pytest.param(
sklearn.linear_model.OrthogonalMatchingPursuit(),
id="OrthogonalMatchingPursuit",
Expand All @@ -182,44 +171,73 @@ def test_estimator_report_coefficients_pandas_dataframe(estimator):
id="PassiveAggressiveRegressor",
),
pytest.param(sklearn.linear_model.Perceptron(), id="Perceptron"),
# pytest.param(sklearn.linear_model.PoissonRegressor(), id="PoissonRegressor"),
pytest.param(sklearn.linear_model.PoissonRegressor(), id="PoissonRegressor"),
pytest.param(sklearn.linear_model.QuantileRegressor(), id="QuantileRegressor"),
pytest.param(sklearn.linear_model.Ridge(), id="Ridge"),
pytest.param(sklearn.linear_model.RidgeClassifier(), id="RidgeClassifier"),
pytest.param(sklearn.linear_model.RidgeClassifierCV(), id="RidgeClassifierCV"),
pytest.param(sklearn.linear_model.RidgeCV(), id="RidgeCV"),
pytest.param(sklearn.linear_model.SGDClassifier(), id="SGDClassifier"),
# pytest.param(sklearn.linear_model.SGDOneClassSVM(), id="SGDOneClassSVM"),
pytest.param(sklearn.linear_model.SGDRegressor(), id="SGDRegressor"),
pytest.param(sklearn.linear_model.TheilSenRegressor(), id="TheilSenRegressor"),
pytest.param(sklearn.linear_model.TweedieRegressor(), id="TweedieRegressor"),
# The following models would be tested in the future when the `EstimatorReport`
# will have metrics specific to these models:
#
# 1. multi-task
# pytest.param(
# sklearn.linear_model.MultiTaskElasticNet(), id="MultiTaskElasticNet"
# ),
# pytest.param(
# sklearn.linear_model.MultiTaskElasticNetCV(), id="MultiTaskElasticNetCV"
# ),
# pytest.param(sklearn.linear_model.MultiTaskLasso(), id="MultiTaskLasso"),
# pytest.param(sklearn.linear_model.MultiTaskLassoCV(), id="MultiTaskLassoCV"),
# 2. cross_decomposition
# pytest.param(sklearn.cross_decomposition.CCA(), id="CCA"),
# pytest.param(sklearn.cross_decomposition.PLSCanonical(), id="PLSCanonical"),
# 3. outlier detectors
# pytest.param(sklearn.linear_model.SGDOneClassSVM(), id="SGDOneClassSVM"),
# pytest.param(sklearn.svm.OneClassSVM(kernel="linear"), id="OneClassSVM"),
],
)
def test_all_sklearn_estimators(
request, estimator, regression_data, classification_data
request,
estimator,
regression_data,
positive_regression_data,
classification_data,
):
"""Check that `coefficients` is supported for every sklearn estimator."""
if is_classifier(estimator):
X, y = classification_data
elif is_regressor(estimator):
X, y = regression_data
if get_tags(estimator).target_tags.positive_only:
X, y = positive_regression_data
else:
X, y = regression_data
else:
raise Exception("Estimator is neither a classifier nor a regressor")
raise Exception("Estimator not in ['classifier', 'regressor']")

estimator.fit(X, y)

report = EstimatorReport(estimator)
result = report.feature_importance.coefficients()

assert result.shape == (6, 1)
assert result.index.tolist() == [
rows = [
"Intercept",
"Feature #0",
"Feature #1",
"Feature #2",
"Feature #3",
"Feature #4",
]
if result.shape == (5, 1): # for TransformedTargetRegressor()
assert rows[1:] == result.index.tolist()
else:
assert result.shape == (6, 1)
assert rows == result.index.tolist()

assert result.columns.tolist() == ["Coefficient"]


Expand Down
9 changes: 9 additions & 0 deletions skore/tests/unit/utils/test_accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ class Estimator:
def __init__(self):
self.coef_ = 0

class MetaEstimator:
def __init__(self):
self.regressor_ = Estimator()

parent = MockParent(Estimator())
accessor = MockAccessor(parent)

Expand All @@ -92,6 +96,11 @@ def __init__(self):

assert _check_has_coef()(accessor)

parent = MockParent(MetaEstimator())
accessor = MockAccessor(parent)

assert _check_has_coef()(accessor)

parent = MockParent(estimator="hello")
accessor = MockAccessor(parent)

Expand Down