diff --git a/.gitignore b/.gitignore index 223e02c8..23da1e7f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,9 @@ venv/ # Model files *.pkl +# Database files +*.db + # Python cache __pycache__/ *.pyc diff --git a/README.md b/README.md index a77cda49..6b349110 100644 --- a/README.md +++ b/README.md @@ -260,6 +260,16 @@ python app.py ``` **Backend will be available at:** `http://localhost:5000` +#### Database Setup +The application uses SQLite for local data storage. The database file is automatically created when you first run the backend server. No manual setup is required. + +**For new contributors:** +1. Ensure you have write permissions in the project directory. +2. Run the backend server (`python app.py`) - this will create `database/wardha.db` automatically. +3. The database contains tables for stations, routes, and passenger logs. + +**Note:** Database files (*.db) are ignored by Git to prevent committing sensitive or large binary data. Each contributor should create their own local database. + #### 3. **Frontend Setup** ```bash # Navigate to frontend directory diff --git a/backend/app.py b/backend/app.py index f6273f98..b26e54c6 100644 --- a/backend/app.py +++ b/backend/app.py @@ -10,7 +10,14 @@ from services import metro_service from flask import request from flask_cors import CORS, cross_origin -from sklearn.ensemble import RandomForestRegressor +try: + from sklearn.ensemble import RandomForestRegressor + SKLEARN_AVAILABLE = True +except Exception: + # scikit-learn may not be installable in some environments (Windows without build tools). + # We'll fall back to a tiny numpy-based DummyRegressor when necessary. + RandomForestRegressor = None + SKLEARN_AVAILABLE = False import numpy as np # Configure logging @@ -49,27 +56,43 @@ logger.error("Error type: %s", type(e).__name__) logger.error("Error message: %s", str(e)) logger.error("Full traceback:\n%s", traceback.format_exc()) - + # Only create dummy model if explicitly enabled via environment variable if USE_DUMMY_MODEL: logger.warning("⚠️ USE_DUMMY_MODEL=true detected. Creating dummy model for development/testing...") logger.warning("⚠️ THIS IS NOT SUITABLE FOR PRODUCTION USE!") - + try: - # Create and train a clearly documented dummy model - # This model generates random predictions and should ONLY be used for testing - np.random.seed(42) - X_dummy = np.random.rand(100, 3) # Features: hour, day_of_week, station_id - y_dummy = np.random.randint(50, 500, 100) # Target: passenger flow (50-500 range) - - model = RandomForestRegressor(n_estimators=10, random_state=42, max_depth=3) - model.fit(X_dummy, y_dummy) - - model_loaded = True - model_type = 'dummy' - logger.warning("⚠️ Dummy model created and trained with synthetic data") - logger.warning("⚠️ Model type: RandomForestRegressor (10 estimators, max_depth=3)") - logger.warning("⚠️ Training data: 100 random samples, passenger flow range: 50-500") + # If scikit-learn is available, create a small RandomForestRegressor for testing + if SKLEARN_AVAILABLE and RandomForestRegressor is not None: + np.random.seed(42) + X_dummy = np.random.rand(100, 3) # Features: hour, day_of_week, station_id + y_dummy = np.random.randint(50, 500, 100) # Target: passenger flow (50-500 range) + + model = RandomForestRegressor(n_estimators=10, random_state=42, max_depth=3) + model.fit(X_dummy, y_dummy) + + model_loaded = True + model_type = 'dummy' + logger.warning("⚠️ Dummy sklearn model created and trained with synthetic data") + else: + # scikit-learn is not available — provide a very small predictable DummyRegressor + class DummyRegressor: + def predict(self, X): + # X may be a pandas DataFrame or array-like — return a simple deterministic value + try: + n = len(X) + except Exception: + n = 1 + # Return a vector of 150 passengers for each input row + return np.full((n,), 150) + + model = DummyRegressor() + model_loaded = True + model_type = 'dummy' + logger.warning("⚠️ Dummy numpy-based model provided (scikit-learn not available)") + + logger.warning("⚠️ Model type: %s", model_type) except Exception as dummy_error: logger.error("❌ Failed to create dummy model: %s", str(dummy_error)) logger.error("Full traceback:\n%s", traceback.format_exc()) diff --git a/backend/models/__pycache__/__init__.cpython-313.pyc b/backend/models/__pycache__/__init__.cpython-313.pyc index 206d2583..2d720b2a 100644 Binary files a/backend/models/__pycache__/__init__.cpython-313.pyc and b/backend/models/__pycache__/__init__.cpython-313.pyc differ diff --git a/backend/models/__pycache__/models.cpython-313.pyc b/backend/models/__pycache__/models.cpython-313.pyc index 93b357c3..8190a336 100644 Binary files a/backend/models/__pycache__/models.cpython-313.pyc and b/backend/models/__pycache__/models.cpython-313.pyc differ diff --git a/database/wardha.db b/database/wardha.db deleted file mode 100644 index b45a2f8e..00000000 Binary files a/database/wardha.db and /dev/null differ