Skip to content

Commit aacba86

Browse files
authored
Merge pull request #1 from nlddfn/hotfix/debug
Hotfix/debug
2 parents 8a1c1a5 + f9009f4 commit aacba86

8 files changed

+23
-26
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# ML Kit: a wrapper around scikit-learn
22
## Getting Started
3+
Clone the repo and set a virtual environment. Tox expects Python 3.7 nevertheless the code is tested also in python 3.6. Should `make test` fail, please change tox.ini to account for your version of python.
34

45
* `make test`
56

mlkit/BaseData.py

+4
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ def __init__(self, df, logger, target=None):
4848
# Initiate logging
4949
self.log = logger
5050

51+
# Initiate class attribues
52+
self.classes = None
53+
self.nclas = None
54+
5155
def EDA(self, what=['basic']):
5256
"""Exploratory data analysis. Max(20000 samples)"""
5357
eda_frac = min(1, 20000 / self._X.shape[0])

mlkit/BaseModel.py

+4
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,10 @@ def threshold_optimization_by_metric(self, metric, model):
128128
return self.ROC_AUC(model)
129129
elif metric == 'Precision_Recall':
130130
return self.Precision_Recall(model)
131+
elif type(metric) == float:
132+
assert 0 < metric < 1., \
133+
'If the value for metric is numerical, it should be between 0 and 1'
134+
return {0: metric}
131135
else:
132136
score_lst = []
133137
thr_lst = np.linspace(.01, .99, 50, endpoint=True)

mlkit/BaseSupervised.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from sklearn.model_selection import GridSearchCV
66

77
from mlkit.BasePreProcessing import BasePreProcessing
8-
from mlkit.base_model_utils import matthews_binary
98

109

1110
class BaseClassifier(BasePreProcessing):
@@ -17,7 +16,7 @@ def base_classifier(self,
1716
grid=None,
1817
fit_params={},
1918
cv_params={},
20-
metric=matthews_binary,
19+
threshold_metric=0.5,
2120
return_prob=False,
2221
owr=False,
2322
model_bin=None):
@@ -88,7 +87,7 @@ def base_classifier(self,
8887
# If binary, use ROC to set model.class_threshold
8988
if self.data.nclas == 2:
9089
best_est.class_threshold = self.threshold_optimization_by_metric(
91-
metric=metric, model=best_est)
90+
metric=threshold_metric, model=best_est)
9291
self.log(
9392
'Classification threshold is {}'.format(
9493
round(best_est.class_threshold[0], 2)))

mlkit/Pipe.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def run(self, data):
143143

144144
pred_lst = []
145145
# Create ensemble data obj
146-
for i, pipe in enumerate(self.pipe_list):
146+
for pipe in self.pipe_list:
147147
data, model = pipe.run(data)
148148
pred = data.get_prediction(model)
149149
pred_lst.append(pred)
@@ -154,7 +154,7 @@ def run(self, data):
154154

155155
# Create Data obj for final estimator
156156
ens_data = Data(df=ens_df,
157-
log_path=getattr(data, 'log'),
157+
logger=getattr(data, 'log'),
158158
target=getattr(data, '_target'))
159159
setattr(ens_data, 'classes', data.classes)
160160
setattr(ens_data, 'nclas', data.nclas)
@@ -185,7 +185,7 @@ def predict(self, data):
185185
ens_df.columns = list(range(ens_df.shape[1]))
186186

187187
ens_data = Data(df=ens_df,
188-
log_path=getattr(data, 'model_path'),
188+
logger=getattr(data, 'log'),
189189
target=getattr(data, '_target'))
190190
pred = self.ens_pip.predict(ens_data)
191191

requirements.txt

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
category-encoders==1.3.0
22
dill==0.2.8.2
33
flake8==3.7.7
4-
imbalanced-learn==0.4.3
5-
numpy==1.15.2
6-
pandas==0.23.4
4+
imbalanced-learn>=0.4.3
5+
numpy>=1.15.2
6+
pandas>=0.23.4
77
pytest==3.6.2
88
pytest-cov==2.5.1
99
pytest-mock==1.10.4
1010
pytest-runner==4.4
1111
setuptools-scm==3.2.0
12-
scikit-learn==0.20.1
12+
scikit-learn>=0.20.1
1313
scikit-optimize==0.5.2
14-
scipy==1.1.0
15-
xgboost==0.80
14+
scipy>=1.1.0
15+
xgboost>=0.80

tests/test_data.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11

2-
from mlkit.BaseData import Data, Validator
2+
from mlkit.BaseData import Data
33
from tests.test_base import TestBase
44

55

@@ -16,17 +16,6 @@ def test_basics(self, setup_model):
1616
what=['basic', 'missing', 'remarkable', 'target_agg', 'target_imbalance']
1717
)
1818

19-
def test_validation(self, setup_data):
20-
# First introduce fake data and model
21-
data = self.data_mock_clas
22-
23-
v = Validator(
24-
data_obj=data,
25-
model_path='examples',
26-
n_samples=100
27-
)
28-
v.run_validation(test=data)
29-
3019
def test_mock_models(self, setup_model):
3120
# Classification
3221
data_clas = self.data_mock_clas

tox.ini

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[tox]
2-
envlist = py36,flake8
2+
envlist = py37,flake8
33

4-
[testenv:py36]
4+
[testenv:py37]
55
deps =
66
-r requirements.txt
77
coverage

0 commit comments

Comments
 (0)