-
Notifications
You must be signed in to change notification settings - Fork 108
Open
Description
Hello,
The kernel keeps crashing after stage 2. Could you please advise on how to resolve this issue?
I use 16 GB RAM, i7 cpu labtop.
Thank you!
import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from datetime import timedelta
from openfe import OpenFE, transform
# 테스트 기간 리스트
test_dates = [
pd.to_datetime('2023-01-05'),
pd.to_datetime('2023-02-08'),
pd.to_datetime('2023-03-07'),
pd.to_datetime('2023-04-11'),
pd.to_datetime('2023-05-07'),
pd.to_datetime('2023-06-08'),
pd.to_datetime('2023-07-09'),
pd.to_datetime('2023-08-08'),
pd.to_datetime('2023-09-06'),
pd.to_datetime('2024-01-10'),
pd.to_datetime('2024-02-11'),
pd.to_datetime('2024-03-08'),
pd.to_datetime('2024-04-06'),
]
# 결과를 저장할 리스트 초기화
results = []
# 각 테스트 기간에 대해 train, test 데이터를 생성하고 LightGBM 모델로 예측
for test_start_date in test_dates:
test_end_date = test_start_date + timedelta(days=90) - timedelta(seconds=1)
train = df[df['tm'] <= test_start_date - timedelta(seconds=1)]
test = df[(df['tm'] >= test_start_date) & (df['tm'] <= test_end_date)]
# Train 데이터가 비어 있는지 확인
if train.empty or test.empty:
print(f"Skipping period {test_start_date} to {test_end_date} due to insufficient data.")
continue
# Features (X)와 Target (y) 설정
x_train = train[var]
y_train = train['demand']
x_test = test[var]
y_test = test['demand']
# OpenFE로 피처 엔지니어링 수행
ofe = OpenFE()
features = ofe.fit(data=x_train, label=y_train, n_jobs=4) # 새로운 피처 생성
x_train, x_test = transform(x_train, x_test, features, n_jobs=4) # 생성된 피처로 데이터 변환
# LightGBM 데이터셋 생성
train_data = lgb.Dataset(x_train, label=y_train)
# LightGBM 모델 파라미터 설정
params = {
'objective': 'regression',
'metric': 'rmse',
'boosting_type': 'gbdt',
'learning_rate': 0.05,
'num_leaves': 31,
'n_estimators': 200,
'feature_fraction': 0.8,
'bagging_fraction': 0.8,
'bagging_freq': 10,
'verbose': 0
}
# 모델 학습
model_lgb = lgb.train(params, train_data, num_boost_round=100)
# 테스트 데이터에 대한 예측
preds_lgb = model_lgb.predict(x_test)
# RMSE 계산
rmse = np.sqrt(mean_squared_error(y_test, preds_lgb))
# 결과 저장
results.append({
'Test Start Date': test_start_date,
'Test End Date': test_end_date,
'RMSE': rmse
})
print(f"Period {test_start_date} to {test_end_date} - RMSE: {rmse}")
# 결과를 데이터프레임으로 변환
results_df = pd.DataFrame(results)
results_df
Metadata
Metadata
Assignees
Labels
No labels