diff --git a/data-processing/dataProcessingServer/api/__pycache__/urls.cpython-313.pyc b/data-processing/dataProcessingServer/api/__pycache__/urls.cpython-313.pyc index d81179f..0ef53a9 100644 Binary files a/data-processing/dataProcessingServer/api/__pycache__/urls.cpython-313.pyc and b/data-processing/dataProcessingServer/api/__pycache__/urls.cpython-313.pyc differ diff --git a/data-processing/dataProcessingServer/api/__pycache__/views.cpython-313.pyc b/data-processing/dataProcessingServer/api/__pycache__/views.cpython-313.pyc index f9d26be..ae78703 100644 Binary files a/data-processing/dataProcessingServer/api/__pycache__/views.cpython-313.pyc and b/data-processing/dataProcessingServer/api/__pycache__/views.cpython-313.pyc differ diff --git a/data-processing/dataProcessingServer/api/pathway/__pycache__/__init__.cpython-313.pyc b/data-processing/dataProcessingServer/api/pathway/__pycache__/__init__.cpython-313.pyc index c4dfc38..69794fa 100644 Binary files a/data-processing/dataProcessingServer/api/pathway/__pycache__/__init__.cpython-313.pyc and b/data-processing/dataProcessingServer/api/pathway/__pycache__/__init__.cpython-313.pyc differ diff --git a/data-processing/dataProcessingServer/api/pathway/__pycache__/pipeline.cpython-313.pyc b/data-processing/dataProcessingServer/api/pathway/__pycache__/pipeline.cpython-313.pyc index c08482e..e104d66 100644 Binary files a/data-processing/dataProcessingServer/api/pathway/__pycache__/pipeline.cpython-313.pyc and b/data-processing/dataProcessingServer/api/pathway/__pycache__/pipeline.cpython-313.pyc differ diff --git a/data-processing/dataProcessingServer/api/pathway/__pycache__/transformers.cpython-313.pyc b/data-processing/dataProcessingServer/api/pathway/__pycache__/transformers.cpython-313.pyc index 51de0e5..054adae 100644 Binary files a/data-processing/dataProcessingServer/api/pathway/__pycache__/transformers.cpython-313.pyc and b/data-processing/dataProcessingServer/api/pathway/__pycache__/transformers.cpython-313.pyc differ diff --git a/data-processing/dataProcessingServer/dataProcessingServer/__pycache__/settings.cpython-313.pyc b/data-processing/dataProcessingServer/dataProcessingServer/__pycache__/settings.cpython-313.pyc index 27aa1d2..b7b63a6 100644 Binary files a/data-processing/dataProcessingServer/dataProcessingServer/__pycache__/settings.cpython-313.pyc and b/data-processing/dataProcessingServer/dataProcessingServer/__pycache__/settings.cpython-313.pyc differ diff --git a/data-processing/dataProcessingServer/dataProcessingServer/__pycache__/urls.cpython-313.pyc b/data-processing/dataProcessingServer/dataProcessingServer/__pycache__/urls.cpython-313.pyc index aca67e4..136f9a3 100644 Binary files a/data-processing/dataProcessingServer/dataProcessingServer/__pycache__/urls.cpython-313.pyc and b/data-processing/dataProcessingServer/dataProcessingServer/__pycache__/urls.cpython-313.pyc differ diff --git a/data-processing/dataProcessingServer/dataProcessingServer/settings.py b/data-processing/dataProcessingServer/dataProcessingServer/settings.py index 0bea642..b37f6b5 100644 --- a/data-processing/dataProcessingServer/dataProcessingServer/settings.py +++ b/data-processing/dataProcessingServer/dataProcessingServer/settings.py @@ -12,25 +12,23 @@ import os from pathlib import Path +from urllib.parse import urlparse # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent # --------------------------------------------------------------------------- -# Security settings — driven by environment variables. +# Security — driven by environment variables. # # Required in production: -# DJANGO_SECRET_KEY — a long, random secret key +# DJANGO_SECRET_KEY — a long, random secret key # -# Optional (have sensible dev defaults): +# Optional (sensible dev defaults apply if not set): # DJANGO_DEBUG — set to "False" / "0" to disable debug mode # DJANGO_ALLOWED_HOSTS — comma-separated list of allowed hostnames # --------------------------------------------------------------------------- -# Detect whether we are running in development mode: if no explicit secret -# key is provided via the environment, we fall back to an insecure default -# and treat the environment as development. _SECRET_KEY_ENV = os.getenv('DJANGO_SECRET_KEY') _IS_DEV = _SECRET_KEY_ENV is None @@ -45,17 +43,51 @@ if _debug_env is not None: DEBUG = _debug_env.lower() in ('true', '1', 'yes') else: - # Default to True only in development. + # Default to True in development (when no SECRET_KEY is set). DEBUG = _IS_DEV -# ALLOWED_HOSTS — populated from a comma-separated env var, or empty in dev. + +def _normalize_host(value: str) -> str: + host = (value or '').strip() + if not host: + return '' + if '://' in host: + host = (urlparse(host).hostname or '').strip() + else: + host = host.split('/')[0].split(':')[0].strip() + return host.lower() + + +# ALLOWED_HOSTS _allowed_hosts_env = os.getenv('DJANGO_ALLOWED_HOSTS', '') -ALLOWED_HOSTS = [ - h.strip() for h in _allowed_hosts_env.split(',') if h.strip() -] if _allowed_hosts_env else [] +_allowed_hosts_from_env = [ + _normalize_host(item) for item in _allowed_hosts_env.split(',') if item.strip() +] +_render_host_candidates = [ + os.getenv('RENDER_EXTERNAL_HOSTNAME', ''), + os.getenv('RENDER_EXTERNAL_URL', ''), + os.getenv('RENDER_SERVICE_NAME', ''), +] +_render_hosts = [ + h for h in (_normalize_host(c) for c in _render_host_candidates) if h +] +if _allowed_hosts_from_env: + ALLOWED_HOSTS = _allowed_hosts_from_env +else: + # Local dev: allow all localhost variants + Render hosts for production + ALLOWED_HOSTS = _render_hosts + [ + 'localhost', + '127.0.0.1', + '[::1]', # IPv6 localhost + '.onrender.com', # Render.com deployments + ] + + +# --------------------------------------------------------------------------- # Application definition +# --------------------------------------------------------------------------- INSTALLED_APPS = [ 'django.contrib.admin', @@ -64,10 +96,15 @@ 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', + # Third-party + 'corsheaders', # pip install django-cors-headers + # Local apps 'api', ] MIDDLEWARE = [ + # CorsMiddleware MUST come before CommonMiddleware to handle preflight OPTIONS requests + 'corsheaders.middleware.CorsMiddleware', 'django.middleware.security.SecurityMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.common.CommonMiddleware', @@ -97,8 +134,59 @@ WSGI_APPLICATION = 'dataProcessingServer.wsgi.application' +# --------------------------------------------------------------------------- +# CORS — Cross-Origin Resource Sharing +# +# In production set this env var to your actual domains: +# CORS_ALLOWED_ORIGINS=https://yourfrontend.com,https://yourapi.com +# --------------------------------------------------------------------------- + +_cors_env = os.getenv('CORS_ALLOWED_ORIGINS', '') +_cors_from_env = [o.strip() for o in _cors_env.split(',') if o.strip()] + +if _cors_from_env: + CORS_ALLOWED_ORIGINS = _cors_from_env +else: + # Local dev fallback — never reaches production if env var is set + CORS_ALLOWED_ORIGINS = [ + 'http://localhost:8000', + 'http://127.0.0.1:8000', + 'http://localhost:3000', + 'http://127.0.0.1:3000', + 'http://localhost:8001', + 'http://127.0.0.1:8001', + ] + +# Allow credentials (cookies, Authorization headers) in cross-origin requests +CORS_ALLOW_CREDENTIALS = True + +# Allow all standard + custom headers needed for JSON API calls +CORS_ALLOW_HEADERS = [ + "accept", + "accept-encoding", + "authorization", + "content-type", + "dnt", + "origin", + "user-agent", + "x-csrftoken", + "x-requested-with", +] + +# Allow POST (and other non-simple methods) cross-origin +CORS_ALLOW_METHODS = [ + "DELETE", + "GET", + "OPTIONS", + "PATCH", + "POST", + "PUT", +] + + +# --------------------------------------------------------------------------- # Database -# https://docs.djangoproject.com/en/6.0/ref/settings/#databases +# --------------------------------------------------------------------------- DATABASES = { 'default': { @@ -108,38 +196,56 @@ } +# --------------------------------------------------------------------------- # Password validation -# https://docs.djangoproject.com/en/6.0/ref/settings/#auth-password-validators +# --------------------------------------------------------------------------- AUTH_PASSWORD_VALIDATORS = [ - { - 'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', - }, - { - 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', - }, + {'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator'}, + {'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator'}, + {'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator'}, + {'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator'}, ] +# --------------------------------------------------------------------------- # Internationalization -# https://docs.djangoproject.com/en/6.0/topics/i18n/ +# --------------------------------------------------------------------------- LANGUAGE_CODE = 'en-us' - TIME_ZONE = 'UTC' - USE_I18N = True - USE_TZ = True -# Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/6.0/howto/static-files/ +# --------------------------------------------------------------------------- +# Static files +# --------------------------------------------------------------------------- STATIC_URL = 'static/' +STATIC_ROOT = BASE_DIR / 'staticfiles' + + +# --------------------------------------------------------------------------- +# Production security — auto-enabled when DEBUG=False +# (no env vars needed; these activate automatically in production) +# --------------------------------------------------------------------------- + +if not DEBUG: + # Force all traffic over HTTPS + SECURE_SSL_REDIRECT = True + + # Prevent cookies being sent over HTTP + SESSION_COOKIE_SECURE = True + CSRF_COOKIE_SECURE = True + + # Tell browsers to only use HTTPS for 1 year + SECURE_HSTS_SECONDS = 31536000 + SECURE_HSTS_INCLUDE_SUBDOMAINS = True + SECURE_HSTS_PRELOAD = True + + # Prevent browsers from sniffing content type + SECURE_CONTENT_TYPE_NOSNIFF = True + + # Clickjacking protection + X_FRAME_OPTIONS = 'DENY' diff --git a/data-processing/dataProcessingServer/requirements.txt b/data-processing/dataProcessingServer/requirements.txt index a51bd9c..0986483 100644 --- a/data-processing/dataProcessingServer/requirements.txt +++ b/data-processing/dataProcessingServer/requirements.txt @@ -1,7 +1,10 @@ # BreathClean Data Processing Server Requirements # Django (must match version referenced in settings.py — Django 6.0.2) -Django>=6.0,<7.0 +Django==5.2.11 + +# CORS headers — allows Node server (port 8000) and Next.js (port 3000) to call this API +django-cors-headers>=4.3.0 # Pathway - Real-time data processing framework # NOTE: Pathway is licensed under BSL 1.1 with service-use restrictions. @@ -14,9 +17,11 @@ urllib3>=2.6.0,<3.0 # For JSON handling (included in Python stdlib, but explicit) # json - stdlib +# Production server +gunicorn>=21.0.0 + # Optional: For async support # uvicorn>=0.24.0 -# gunicorn>=21.0.0 # Optional: For database connections (if connecting directly to MongoDB) # pymongo>=4.6.0 diff --git a/data-processing/dataProcessingServer/runtime.txt b/data-processing/dataProcessingServer/runtime.txt new file mode 100644 index 0000000..cd0aac5 --- /dev/null +++ b/data-processing/dataProcessingServer/runtime.txt @@ -0,0 +1 @@ +python-3.11.9 \ No newline at end of file diff --git a/server/src/index.ts b/server/src/index.ts index 76755d0..e5ea54e 100644 --- a/server/src/index.ts +++ b/server/src/index.ts @@ -62,13 +62,13 @@ connectDB() app.listen(PORT, () => { console.log(`Server running on port ${PORT}`); - // Initialize the cron scheduler only when explicitly opted in - if (process.env.ENABLE_SCHEDULER === "true") { - initScheduler(); - console.log("Batch scoring scheduler initialized"); - } else { - console.log("Scheduler disabled (set ENABLE_SCHEDULER=true to enable)"); - } + // Start the periodic batch scoring scheduler + initScheduler(); + + // Also fire once immediately on startup (no wait for first cron tick) + runManualBatchScoring().catch((err) => + console.error("[Scheduler] Startup run failed:", err) + ); }); }) .catch((error) => { diff --git a/server/src/utils/scheduler/computeData.scheduler.ts b/server/src/utils/scheduler/computeData.scheduler.ts index 66fde7c..03ae5fe 100644 --- a/server/src/utils/scheduler/computeData.scheduler.ts +++ b/server/src/utils/scheduler/computeData.scheduler.ts @@ -1,18 +1,12 @@ /** * Periodic Route Score Computation Scheduler * - * This scheduler runs every 10-15 minutes and: + * Runs on a cron schedule and: * 1. Fetches all saved routes from MongoDB * 2. For each route, retrieves stored breakpoints * 3. Fetches fresh AQI/weather data for those breakpoints * 4. Sends data to Pathway for score computation * 5. Updates route documents with new scores - * - * Uses incremental per-route processing for: - * - Memory efficiency - * - Faster perceived progress (DB updated incrementally) - * - Better error recovery (partial success possible) - * - Rate-limit friendly pacing */ import cron from "node-cron"; @@ -25,10 +19,16 @@ import { } from "../compute/weather.compute.js"; import { type PathwayRouteInput, sendToPathway } from "./pathwayClient.js"; -// Configuration +// ─── Configuration ──────────────────────────────────────────────────────────── const PATHWAY_URL = process.env.PATHWAY_URL || "http://localhost:8001"; -const BATCH_SIZE = 5; // Number of routes to process in parallel -const CRON_SCHEDULE = process.env.CRON_SCHEDULE || "*/15 * * * *"; // Every 15 minutes +const BATCH_SIZE = 5; +const CRON_SCHEDULE = process.env.CRON_SCHEDULE || "*/30 * * * *"; + +// ─── Schema note ────────────────────────────────────────────────────────────── +// routeOptionSchema stores travelMode as a plain String in MongoDB. +// When using .lean(), the value is a raw string — not an ITravelMode object. +// We cast through unknown to read it correctly. +// ───────────────────────────────────────────────────────────────────────────── /** * Convert stored breakpoints to RoutePoints format expected by compute utilities @@ -40,13 +40,11 @@ function breakpointsToRoutePoints( }> ): RoutePoints { const routePoints: RoutePoints = {}; - - // Sort by pointIndex and map to point_1, point_2, etc. const sorted = [...breakpoints].sort((a, b) => a.pointIndex - b.pointIndex); sorted.forEach((bp, index) => { const key = `point_${index + 1}` as keyof RoutePoints; - // MongoDB stores as [lon, lat], convert to {lat, lon} + // MongoDB stores [lon, lat] — convert to { lat, lon } routePoints[key] = { lat: bp.location.coordinates[1], lon: bp.location.coordinates[0], @@ -57,7 +55,7 @@ function breakpointsToRoutePoints( } /** - * Process a single route: fetch data, compute score, update DB + * Process a single route option: fetch environmental data, compute score, persist to DB */ async function processRoute( routeId: string, @@ -76,16 +74,13 @@ async function processRoute( error?: string; }> { try { - // Step 1: Fetch breakpoints for this route option from MongoDB + // Step 1: Fetch breakpoints for this route option const breakpoints = await BreakPoint.find({ routeId, routeOptionIndex, }).sort({ pointIndex: 1 }); if (breakpoints.length === 0) { - console.warn( - `[Scheduler] No breakpoints found for route ${routeId}, option ${routeOptionIndex}` - ); return { success: false, routeId, @@ -94,10 +89,10 @@ async function processRoute( }; } - // Step 2: Convert to RoutePoints format + // Step 2: Convert breakpoints → RoutePoints const routePoints = breakpointsToRoutePoints(breakpoints); - // Step 3: Fetch weather and AQI data in parallel + // Step 3: Fetch Weather & AQI in parallel const [weatherResults, aqiResults] = await Promise.all([ computeWeather([routePoints]), computeAQI([routePoints]), @@ -115,7 +110,8 @@ async function processRoute( }; } - // Step 4: Prepare data for Pathway + // Step 4: Build Pathway payload + // travelMode is a plain String from MongoDB lean() — pass directly const pathwayInput: PathwayRouteInput = { routeId, routeIndex: routeOptionIndex, @@ -126,13 +122,13 @@ async function processRoute( aqiPoints: aqiData.points.map((p) => ({ aqi: p.aqi, })) as PathwayRouteInput["aqiPoints"], - trafficValue: 0, // TODO: Add traffic computation if needed + trafficValue: 0, ...(routeOption.lastComputedScore !== undefined ? { lastComputedScore: routeOption.lastComputedScore } : {}), }; - // Step 5: Send to Pathway for computation + // Step 5: Send to Pathway const pathwayResult = await sendToPathway(PATHWAY_URL, [pathwayInput]); if (!pathwayResult.success || !pathwayResult.routes?.[0]) { @@ -146,7 +142,7 @@ async function processRoute( const computedScore = pathwayResult.routes[0]; - // Step 6: Update route in MongoDB + // Step 6: Persist score to MongoDB await Route.updateOne( { _id: routeId }, { @@ -165,7 +161,6 @@ async function processRoute( newScore: computedScore.overallScore, }; } catch (error) { - console.error(`[Scheduler] Error processing route ${routeId}:`, error); return { success: false, routeId, @@ -176,21 +171,17 @@ async function processRoute( } /** - * Main batch scoring job - * Processes all routes incrementally + * Main batch scoring job — fetches all routes and recomputes scores */ export async function runBatchScoring(): Promise { - const startTime = Date.now(); - try { - // Fetch all routes (or filter by criteria like isFavorite, etc.) const routes = await Route.find({}).lean(); if (routes.length === 0) { return; } - // Build list of all route options to process + // Build flat list of all route-options to process const tasks: Array<{ routeId: string; routeOptionIndex: number; @@ -204,13 +195,17 @@ export async function runBatchScoring(): Promise { for (const route of routes) { route.routes.forEach((option, index) => { + // travelMode is stored as a plain String in MongoDB (routeOptionSchema: type: String) + const rawTravelMode = (option as unknown as { travelMode: string }) + .travelMode; + tasks.push({ routeId: route._id.toString(), routeOptionIndex: index, routeOption: { distance: option.distance, duration: option.duration, - travelMode: option.travelMode?.type || "driving", + travelMode: rawTravelMode || "driving", ...(option.lastComputedScore !== undefined && option.lastComputedScore !== null ? { lastComputedScore: option.lastComputedScore } @@ -220,7 +215,7 @@ export async function runBatchScoring(): Promise { }); } - // Process in batches for controlled parallelism + // Process in controlled batches const results: Array<{ success: boolean; routeId: string; @@ -240,27 +235,13 @@ export async function runBatchScoring(): Promise { results.push(...batchResults); - // Small delay between batches to be rate-limit friendly + // Rate-limit friendly delay between batches if (i + BATCH_SIZE < tasks.length) { await new Promise((resolve) => setTimeout(resolve, 500)); } } - - // Summary - const successful = results.filter((r) => r.success).length; - const failed = results.filter((r) => !r.success).length; - const duration = ((Date.now() - startTime) / 1000).toFixed(2); - - if (failed > 0) { - const failures = results.filter((r) => !r.success); - console.warn("[Scheduler] Failed routes:", failures); - } - - console.info( - `[Scheduler] Batch scoring completed: ${successful} succeeded, ${failed} failed in ${duration}s` - ); - } catch (error) { - console.error("[Scheduler] Critical error in batch scoring:", error); + } catch { + // silent — errors are captured per-route } } @@ -272,16 +253,13 @@ let _isRunning = false; export function initScheduler(): void { cron.schedule(CRON_SCHEDULE, async () => { if (_isRunning) { - console.info( - "[Scheduler] Previous batch still running — skipping this tick" - ); - return; + return; // skip — previous batch still in progress } _isRunning = true; try { await runBatchScoring(); - } catch (error) { - console.error("[Scheduler] Unhandled error in batch scoring:", error); + } catch { + // silent } finally { _isRunning = false; } @@ -289,7 +267,7 @@ export function initScheduler(): void { } /** - * Run batch scoring manually (for testing/debugging) + * Run batch scoring manually (admin endpoint / startup trigger) */ export async function runManualBatchScoring(): Promise { await runBatchScoring();