Skip to content

Commit

Permalink
Merge pull request #1 from azuur/feautre/update-feature-engineering
Browse files Browse the repository at this point in the history
Update feature engineering
  • Loading branch information
azuur authored Jan 16, 2024
2 parents 88f3f97 + c0a0456 commit eac407d
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 6 deletions.
20 changes: 17 additions & 3 deletions ml_pipelines/common/feature_eng.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,23 @@
import numpy as np
import pandas as pd
from pydantic import BaseModel


def transform_features(data: pd.DataFrame):
class FeatureEngineeringParams(BaseModel):
x1_exp_mean: float
x2_log_mean: float


def transform_features(
data: pd.DataFrame, feature_eng_params: FeatureEngineeringParams
):
data = data.copy()
data["X1"] = np.exp(data["X1"]) - 1
data["X2"] = np.log(data["X2"])
data["X1"] = np.exp(data["X1"]) - feature_eng_params.x1_exp_mean
data["X2"] = np.log(data["X2"]) - feature_eng_params.x2_log_mean
return data


def fit_feature_transform(data: pd.DataFrame):
x1_exp_mean = np.exp(data["X1"]).mean()
x2_log_mean = np.log(data["X2"]).mean()
return FeatureEngineeringParams(x1_exp_mean=x1_exp_mean, x2_log_mean=x2_log_mean)
9 changes: 6 additions & 3 deletions ml_pipelines/train_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import pandas as pd

from ml_pipelines.common.feature_eng import transform_features
from ml_pipelines.common.feature_eng import fit_feature_transform, transform_features
from ml_pipelines.train.train import save_model, split_data, train_model

# Input
data = pd.read_csv("data.csv")

raw_train_data, raw_test_data = split_data(data, random_state=3397)
train_data = transform_features(raw_train_data)
test_data = transform_features(raw_test_data)
feature_eng_params = fit_feature_transform(raw_train_data)
train_data = transform_features(raw_train_data, feature_eng_params)
test_data = transform_features(raw_test_data, feature_eng_params)
model = train_model(train_data=train_data)

# Outputs
Expand All @@ -17,3 +18,5 @@
raw_test_data.to_csv("raw_test_data.csv", index=False)
train_data.to_csv("train_data.csv", index=False)
test_data.to_csv("test_data.csv", index=False)
with open("feature_eng_params.json", "w") as f:
f.write(feature_eng_params.json())

0 comments on commit eac407d

Please sign in to comment.