Skip to content

Commit b072ab0

Browse files
added future warning for target encoder parameters
1 parent 9598507 commit b072ab0

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

category_encoders/one_hot.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,6 @@ def generate_mapping(self):
238238
if self.handle_missing == 'return_nan':
239239
base_df.loc[-2] = np.nan
240240
elif self.handle_missing == 'value':
241-
print(base_df.info())
242241
base_df.loc[-2] = 0
243242

244243
mapping.append({'col': col, 'mapping': base_df})

category_encoders/target_encoder.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Target Encoder"""
2+
import warnings
23
import numpy as np
34
import pandas as pd
45
from sklearn.base import BaseEstimator
@@ -35,10 +36,12 @@ class TargetEncoder(BaseEstimator, util.TransformerWithTargetMixin):
3536
handle_unknown: str
3637
options are 'error', 'return_nan' and 'value', defaults to 'value', which returns the target mean.
3738
min_samples_leaf: int
38-
minimum samples to take category average into account.
39+
For regularization the weighted average between category mean and global mean is taken. The weight is
40+
an S-shaped curve between 0 and 1 with the number of samples for a category on the x-axis.
41+
The curve reaches 0.5 at min_samples_leaf. (parameter k in the original paper)
3942
smoothing: float
4043
smoothing effect to balance categorical average vs prior. Higher value means stronger regularization.
41-
The value must be strictly bigger than 0.
44+
The value must be strictly bigger than 0. Higher values mean a flatter S-curve (see min_samples_leaf).
4245
4346
Example
4447
-------
@@ -88,7 +91,13 @@ def __init__(self, verbose=0, cols=None, drop_invariant=False, return_df=True, h
8891
self.cols = cols
8992
self.ordinal_encoder = None
9093
self.min_samples_leaf = min_samples_leaf
91-
self.smoothing = float(smoothing) # Make smoothing a float so that python 2 does not treat as integer division
94+
if min_samples_leaf == 1:
95+
warnings.warn("Default parameter min_samples_leaf will change in version 2.6."
96+
"See https://github.com/scikit-learn-contrib/category_encoders/issues/327")
97+
self.smoothing = smoothing
98+
if min_samples_leaf == 1.0:
99+
warnings.warn("Default parameter smoothing will change in version 2.6."
100+
"See https://github.com/scikit-learn-contrib/category_encoders/issues/327")
92101
self._dim = None
93102
self.mapping = None
94103
self.handle_unknown = handle_unknown

0 commit comments

Comments
 (0)