diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2612866 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +mlflow.db \ No newline at end of file diff --git a/data/drift_report.html b/data/drift_report.html new file mode 100644 index 0000000..56cf6e6 --- /dev/null +++ b/data/drift_report.html @@ -0,0 +1,46 @@ + + + + + + + + +
Loading...
+ + + + diff --git a/data/iris_RandomForestClassifier.py b/data/iris_RandomForestClassifier.py index f5f668b..2300c5c 100644 --- a/data/iris_RandomForestClassifier.py +++ b/data/iris_RandomForestClassifier.py @@ -1,16 +1,94 @@ import mlflow +import mlflow.sklearn +import numpy as np +import pandas as pd from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import accuracy_score +from evidently.report import Report +from evidently.metric_preset import DataDriftPreset +import warnings +warnings.filterwarnings('ignore') + +mlflow.set_tracking_uri("http://127.0.0.1:5000") +mlflow.set_experiment("Iris_Classification_Drift_Detection") + +print(f"Tracking URI: {mlflow.get_tracking_uri()}") + +#Enable MLflow autologging +mlflow.autolog() # Load data and prep +print("\nStep 1: Loading data and preparing for training...") iris_data = load_iris(as_frame=True) df = iris_data.frame X = df.drop(columns=["target"]) y = df["target"] -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +#Train and log baseline model with Mlflow +print("\nStep 2: Training and logging baseline model...") +with mlflow.start_run(run_name ="iris_rf_baseline") as run: + # Train model + model = RandomForestClassifier(n_estimators=100, random_state=42) + model.fit(X_train, y_train) + + #Calculate and log metrics + y_pred = model.predict(X_test) + accuracy = accuracy_score(y_test, y_pred) + mlflow.log_metric("accuracy", accuracy) + + #Log model + mlflow.sklearn.log_model(model, "random_forest_model") + + #Log training data as reference for drift detection + mlflow.log_text(X_train.to_csv(index=False), "reference_data.csv") + + run_id = run.info.run_id + print(f"Model trained with accuracy: {accuracy: .4f}") + print(f"Mlflow Run ID: {run_id}") + +#Simulate feature drift +print("\nStep 3: Simulating feature drift...") +X_drifted = X_test.copy() +#Introduce drift in sepal length feature +X_drifted["sepal length (cm)"] += np.random.normal(loc= 2.0, scale = 0.3, size = len(X_drifted)) + +#Detect drift using Evidently +print("\nStep 4: Detecting drift with Evidently... ") +report = Report(metrics=[DataDriftPreset()]) +report.run(reference_data=X_train, current_data=X_drifted) + +#Save report as HTML +report.save_html("drift_report.html") + +#Log drift report to Mlflow +print('\nStep 5: Logging drift report to Mlflow...') +with mlflow.start_run(run_id = run_id): + mlflow.log_artifact("drift_report.html", "drift_reports") + +#Print drift summary +result = report.as_dict() +n_drifted_features = result ['metrics'][0]['result']['number_of_drifted_columns'] +n_features = result['metrics'][0]['result']['number_of_columns'] + +print(f"\nDrift Detection Result:") +print(f"Features analyzed: {n_features}") +print(f"Features drifted: {n_drifted_features}") +print(f"Drift percentage: {(n_drifted_features / n_features) * 100: .2f}%") + +#Check model performance on drifted data +print("\nStep 6: Evaluating model on drifted data...") +y_pred_drifted = model.predict(X_drifted) +accuracy_drifted = accuracy_score(y_test, y_pred_drifted) + +print(f"\nModel accuracy on drifted data: {accuracy_drifted: .4f}") + +#Log drifted performance to Mlflow +with mlflow.start_run(run_id=run_id): + mlflow.log_metric("accuracy_drifted", accuracy_drifted) -# Train -model = RandomForestClassifier() -model.fit(X_train, y_train) \ No newline at end of file +print("\nProcess completed! Check Mlflow UI for results.") +print("Run 'mlflow ui --backend-store-uri sqlite:///mlflow.db' to view the results in MLflow UI") \ No newline at end of file diff --git a/mlartifacts/1/9dc9885b8c604676a5c0ee7d39e932f2/artifacts/drift_reports/drift_report.html b/mlartifacts/1/9dc9885b8c604676a5c0ee7d39e932f2/artifacts/drift_reports/drift_report.html new file mode 100644 index 0000000..56cf6e6 --- /dev/null +++ b/mlartifacts/1/9dc9885b8c604676a5c0ee7d39e932f2/artifacts/drift_reports/drift_report.html @@ -0,0 +1,46 @@ + + + + + + + + +
Loading...
+ + + + diff --git a/mlartifacts/1/9dc9885b8c604676a5c0ee7d39e932f2/artifacts/estimator.html b/mlartifacts/1/9dc9885b8c604676a5c0ee7d39e932f2/artifacts/estimator.html new file mode 100644 index 0000000..d3d1a9e --- /dev/null +++ b/mlartifacts/1/9dc9885b8c604676a5c0ee7d39e932f2/artifacts/estimator.html @@ -0,0 +1,730 @@ + + + + + + + +
RandomForestClassifier(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
+ + + \ No newline at end of file diff --git a/mlartifacts/1/9dc9885b8c604676a5c0ee7d39e932f2/artifacts/reference_data.csv b/mlartifacts/1/9dc9885b8c604676a5c0ee7d39e932f2/artifacts/reference_data.csv new file mode 100644 index 0000000..57d51c7 --- /dev/null +++ b/mlartifacts/1/9dc9885b8c604676a5c0ee7d39e932f2/artifacts/reference_data.csv @@ -0,0 +1,121 @@ +sepal length (cm),sepal width (cm),petal length (cm),petal width (cm) +4.6,3.6,1.0,0.2 +5.7,4.4,1.5,0.4 +6.7,3.1,4.4,1.4 +4.8,3.4,1.6,0.2 +4.4,3.2,1.3,0.2 +6.3,2.5,5.0,1.9 +6.4,3.2,4.5,1.5 +5.2,3.5,1.5,0.2 +5.0,3.6,1.4,0.2 +5.2,4.1,1.5,0.1 +5.8,2.7,5.1,1.9 +6.0,3.4,4.5,1.6 +6.7,3.1,4.7,1.5 +5.4,3.9,1.3,0.4 +5.4,3.7,1.5,0.2 +5.5,2.4,3.7,1.0 +6.3,2.8,5.1,1.5 +6.4,3.1,5.5,1.8 +6.6,3.0,4.4,1.4 +7.2,3.6,6.1,2.5 +5.7,2.9,4.2,1.3 +7.6,3.0,6.6,2.1 +5.6,3.0,4.5,1.5 +5.1,3.5,1.4,0.2 +7.7,2.8,6.7,2.0 +5.8,2.7,4.1,1.0 +5.2,3.4,1.4,0.2 +5.0,3.5,1.3,0.3 +5.1,3.8,1.9,0.4 +5.0,2.0,3.5,1.0 +6.3,2.7,4.9,1.8 +4.8,3.4,1.9,0.2 +5.0,3.0,1.6,0.2 +5.1,3.3,1.7,0.5 +5.6,2.7,4.2,1.3 +5.1,3.4,1.5,0.2 +5.7,3.0,4.2,1.2 +7.7,3.8,6.7,2.2 +4.6,3.2,1.4,0.2 +6.2,2.9,4.3,1.3 +5.7,2.5,5.0,2.0 +5.5,4.2,1.4,0.2 +6.0,3.0,4.8,1.8 +5.8,2.7,5.1,1.9 +6.0,2.2,4.0,1.0 +5.4,3.0,4.5,1.5 +6.2,3.4,5.4,2.3 +5.5,2.3,4.0,1.3 +5.4,3.9,1.7,0.4 +5.0,2.3,3.3,1.0 +6.4,2.7,5.3,1.9 +5.0,3.3,1.4,0.2 +5.0,3.2,1.2,0.2 +5.5,2.4,3.8,1.1 +6.7,3.0,5.0,1.7 +4.9,3.1,1.5,0.2 +5.8,2.8,5.1,2.4 +5.0,3.4,1.5,0.2 +5.0,3.5,1.6,0.6 +5.9,3.2,4.8,1.8 +5.1,2.5,3.0,1.1 +6.9,3.2,5.7,2.3 +6.0,2.7,5.1,1.6 +6.1,2.6,5.6,1.4 +7.7,3.0,6.1,2.3 +5.5,2.5,4.0,1.3 +4.4,2.9,1.4,0.2 +4.3,3.0,1.1,0.1 +6.0,2.2,5.0,1.5 +7.2,3.2,6.0,1.8 +4.6,3.1,1.5,0.2 +5.1,3.5,1.4,0.3 +4.4,3.0,1.3,0.2 +6.3,2.5,4.9,1.5 +6.3,3.4,5.6,2.4 +4.6,3.4,1.4,0.3 +6.8,3.0,5.5,2.1 +6.3,3.3,6.0,2.5 +4.7,3.2,1.3,0.2 +6.1,2.9,4.7,1.4 +6.5,2.8,4.6,1.5 +6.2,2.8,4.8,1.8 +7.0,3.2,4.7,1.4 +6.4,3.2,5.3,2.3 +5.1,3.8,1.6,0.2 +6.9,3.1,5.4,2.1 +5.9,3.0,4.2,1.5 +6.5,3.0,5.2,2.0 +5.7,2.6,3.5,1.0 +5.2,2.7,3.9,1.4 +6.1,3.0,4.6,1.4 +4.5,2.3,1.3,0.3 +6.6,2.9,4.6,1.3 +5.5,2.6,4.4,1.2 +5.3,3.7,1.5,0.2 +5.6,3.0,4.1,1.3 +7.3,2.9,6.3,1.8 +6.7,3.3,5.7,2.1 +5.1,3.7,1.5,0.4 +4.9,2.4,3.3,1.0 +6.7,3.3,5.7,2.5 +7.2,3.0,5.8,1.6 +4.9,3.6,1.4,0.1 +6.7,3.1,5.6,2.4 +4.9,3.0,1.4,0.2 +6.9,3.1,4.9,1.5 +7.4,2.8,6.1,1.9 +6.3,2.9,5.6,1.8 +5.7,2.8,4.1,1.3 +6.5,3.0,5.5,1.8 +6.3,2.3,4.4,1.3 +6.4,2.9,4.3,1.3 +5.6,2.8,4.9,2.0 +5.9,3.0,5.1,1.8 +5.4,3.4,1.7,0.2 +6.1,2.8,4.0,1.3 +4.9,2.5,4.5,1.7 +5.8,4.0,1.2,0.2 +5.8,2.6,4.0,1.2 +7.1,3.0,5.9,2.1 diff --git a/mlartifacts/1/9dc9885b8c604676a5c0ee7d39e932f2/artifacts/training_confusion_matrix.png b/mlartifacts/1/9dc9885b8c604676a5c0ee7d39e932f2/artifacts/training_confusion_matrix.png new file mode 100644 index 0000000..1261b38 Binary files /dev/null and b/mlartifacts/1/9dc9885b8c604676a5c0ee7d39e932f2/artifacts/training_confusion_matrix.png differ diff --git a/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/MLmodel b/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/MLmodel new file mode 100644 index 0000000..1a3de60 --- /dev/null +++ b/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/MLmodel @@ -0,0 +1,31 @@ +artifact_path: mlflow-artifacts:/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts +flavors: + python_function: + env: + conda: conda.yaml + virtualenv: python_env.yaml + loader_module: mlflow.sklearn + model_path: model.pkl + predict_fn: predict + python_version: 3.10.19 + sklearn: + code: null + pickled_model: model.pkl + serialization_format: cloudpickle + sklearn_version: 1.7.2 +is_signature_from_type_hint: false +mlflow_version: 3.5.1 +model_id: m-6276af9744a9405cb6d9cb8602894a0e +model_size_bytes: 177380 +model_uuid: m-6276af9744a9405cb6d9cb8602894a0e +prompts: null +run_id: 9dc9885b8c604676a5c0ee7d39e932f2 +signature: + inputs: '[{"type": "double", "name": "sepal length (cm)", "required": true}, {"type": + "double", "name": "sepal width (cm)", "required": true}, {"type": "double", "name": + "petal length (cm)", "required": true}, {"type": "double", "name": "petal width + (cm)", "required": true}]' + outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "int32", "shape": [-1]}}]' + params: null +type_hint_from_example: false +utc_time_created: '2025-10-29 15:39:25.313443' diff --git a/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/conda.yaml b/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/conda.yaml new file mode 100644 index 0000000..96cfe03 --- /dev/null +++ b/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/conda.yaml @@ -0,0 +1,15 @@ +channels: +- conda-forge +dependencies: +- python=3.10.19 +- pip<=25.2 +- pip: + - mlflow==3.5.1 + - cloudpickle==3.1.1 + - numpy==1.26.4 + - pandas==2.3.3 + - psutil==5.9.8 + - pyarrow==21.0.0 + - scikit-learn==1.7.2 + - scipy==1.15.3 +name: mlflow-env diff --git a/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/model.pkl b/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/model.pkl new file mode 100644 index 0000000..e0eb14d Binary files /dev/null and b/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/model.pkl differ diff --git a/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/python_env.yaml b/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/python_env.yaml new file mode 100644 index 0000000..45c04a8 --- /dev/null +++ b/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/python_env.yaml @@ -0,0 +1,7 @@ +python: 3.10.19 +build_dependencies: +- pip==25.2 +- setuptools==80.9.0 +- wheel==0.45.1 +dependencies: +- -r requirements.txt diff --git a/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/requirements.txt b/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/requirements.txt new file mode 100644 index 0000000..0a0d58c --- /dev/null +++ b/mlartifacts/1/models/m-6276af9744a9405cb6d9cb8602894a0e/artifacts/requirements.txt @@ -0,0 +1,8 @@ +mlflow==3.5.1 +cloudpickle==3.1.1 +numpy==1.26.4 +pandas==2.3.3 +psutil==5.9.8 +pyarrow==21.0.0 +scikit-learn==1.7.2 +scipy==1.15.3 \ No newline at end of file diff --git a/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/MLmodel b/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/MLmodel new file mode 100644 index 0000000..c02011d --- /dev/null +++ b/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/MLmodel @@ -0,0 +1,22 @@ +artifact_path: mlflow-artifacts:/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts +flavors: + python_function: + env: + conda: conda.yaml + virtualenv: python_env.yaml + loader_module: mlflow.sklearn + model_path: model.pkl + predict_fn: predict + python_version: 3.10.19 + sklearn: + code: null + pickled_model: model.pkl + serialization_format: cloudpickle + sklearn_version: 1.7.2 +mlflow_version: 3.5.1 +model_id: m-6954bd530e684ff1a1e5eb234663d1d0 +model_size_bytes: 177432 +model_uuid: m-6954bd530e684ff1a1e5eb234663d1d0 +prompts: null +run_id: 9dc9885b8c604676a5c0ee7d39e932f2 +utc_time_created: '2025-10-29 15:39:30.891458' diff --git a/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/conda.yaml b/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/conda.yaml new file mode 100644 index 0000000..96cfe03 --- /dev/null +++ b/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/conda.yaml @@ -0,0 +1,15 @@ +channels: +- conda-forge +dependencies: +- python=3.10.19 +- pip<=25.2 +- pip: + - mlflow==3.5.1 + - cloudpickle==3.1.1 + - numpy==1.26.4 + - pandas==2.3.3 + - psutil==5.9.8 + - pyarrow==21.0.0 + - scikit-learn==1.7.2 + - scipy==1.15.3 +name: mlflow-env diff --git a/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/model.pkl b/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/model.pkl new file mode 100644 index 0000000..ae12bbe Binary files /dev/null and b/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/model.pkl differ diff --git a/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/python_env.yaml b/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/python_env.yaml new file mode 100644 index 0000000..45c04a8 --- /dev/null +++ b/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/python_env.yaml @@ -0,0 +1,7 @@ +python: 3.10.19 +build_dependencies: +- pip==25.2 +- setuptools==80.9.0 +- wheel==0.45.1 +dependencies: +- -r requirements.txt diff --git a/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/requirements.txt b/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/requirements.txt new file mode 100644 index 0000000..0a0d58c --- /dev/null +++ b/mlartifacts/1/models/m-6954bd530e684ff1a1e5eb234663d1d0/artifacts/requirements.txt @@ -0,0 +1,8 @@ +mlflow==3.5.1 +cloudpickle==3.1.1 +numpy==1.26.4 +pandas==2.3.3 +psutil==5.9.8 +pyarrow==21.0.0 +scikit-learn==1.7.2 +scipy==1.15.3 \ No newline at end of file diff --git a/screenshots/artifact_section.png b/screenshots/artifact_section.png new file mode 100644 index 0000000..93e060b Binary files /dev/null and b/screenshots/artifact_section.png differ diff --git a/screenshots/artifact_section_showing_drift_repor.png b/screenshots/artifact_section_showing_drift_repor.png new file mode 100644 index 0000000..2f6e78b Binary files /dev/null and b/screenshots/artifact_section_showing_drift_repor.png differ diff --git a/screenshots/drift_repor.png b/screenshots/drift_repor.png new file mode 100644 index 0000000..6d19841 Binary files /dev/null and b/screenshots/drift_repor.png differ diff --git a/screenshots/mlflow_experiment.png b/screenshots/mlflow_experiment.png new file mode 100644 index 0000000..bea54c1 Binary files /dev/null and b/screenshots/mlflow_experiment.png differ diff --git a/screenshots/run.png b/screenshots/run.png new file mode 100644 index 0000000..54cd557 Binary files /dev/null and b/screenshots/run.png differ diff --git a/screenshots/run_details_with_metrics.png b/screenshots/run_details_with_metrics.png new file mode 100644 index 0000000..884527e Binary files /dev/null and b/screenshots/run_details_with_metrics.png differ