MrPhantom2325 · MrPhantom2325 · May 13, 2026 · May 10, 2026 · May 10, 2026 · May 11, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,32 @@
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+.venv/
+venv/
+*.egg-info/
+
+# MLflow artifacts (mounted at runtime, not baked in)
+mlruns/
+
+# Experiment outputs (large, gitignored already)
+experiments/policies/
+experiments/results/
+experiments/figures/
+experiments/videos/
+
+# Git
+.git/
+.gitignore
+
+# Docs / editor noise
+*.md
+.DS_Store
+.idea/
+.vscode/
+
+# Tests (not needed in prod images)
+tests/
+
+# Handoff / scratch files
+PROJECT_HANDOFF.md
diff --git a/.dvc/config b/.dvc/config
diff --git a/.dvc/tmp/btime b/.dvc/tmp/btime
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -0,0 +1,33 @@
+<!--
+Quick PR template. Replace each placeholder before requesting review.
+-->
+
+## What
+
+Short summary of the change.
+
+## Why
+
+Why is this needed? (rubric requirement, bug fix, etc.)
+
+## How to test
+
+Concrete steps to verify locally:
+
+```bash
+# e.g.
+pytest tests/test_xxx.py -v
+python train.py --config configs/...
+```
+
+## Linked issue
+
+Closes #<issue-number>
+
+## Checklist
+
+- [ ] Tests added / updated
+- [ ] `ruff check .` passes
+- [ ] `pytest tests/` passes
+- [ ] Docs updated (README / CONTRIBUTING if relevant)
+- [ ] Self-review: walked through the diff and left at least one comment
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
@@ -0,0 +1,91 @@
+name: Retrain
+
+# Trigger:
+# - manual via the Actions tab (workflow_dispatch) — pick a config from the dropdown
+# - push to main with "[retrain]" in the commit message
+#
+# Note: this is a *demonstration* CD workflow. In a real deployment you'd
+# push the resulting policy to S3/GCS or a model registry served from MLflow,
+# but for Phase 1 we just upload the policy artifact to the workflow run so
+# you can download it from the GitHub UI.
+on:
+  workflow_dispatch:
+    inputs:
+      config:
+        description: "Config file to train with"
+        required: true
+        default: "configs/dqn_tuned.yaml"
+        type: choice
+        options:
+          - configs/qlearning_v1.yaml
+          - configs/qlearning_v2_explored.yaml
+          - configs/q_learning_tuned.yaml
+          - configs/sarsa_v1.yaml
+          - configs/sarsa_tuned.yaml
+          - configs/dqn_v1.yaml
+          - configs/dqn_v2_holiday.yaml
+          - configs/dqn_tuned.yaml
+  push:
+    branches: [main]
+
+jobs:
+  retrain:
+    name: Train policy
+    runs-on: ubuntu-latest
+    # Skip on regular pushes; only run if the commit message contains [retrain]
+    if: |
+      github.event_name == 'workflow_dispatch' ||
+      contains(github.event.head_commit.message, '[retrain]')
+    timeout-minutes: 60
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+
+      - name: Install CPU-only PyTorch
+        run: |
+          pip install --upgrade pip
+          pip install torch==2.2.0 --index-url https://download.pytorch.org/whl/cpu
+
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+
+      - name: Prepare data
+        run: python data_prep.py --scenario all
+
+      - name: Resolve config to train
+        id: cfg
+        run: |
+          if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
+            echo "CONFIG=${{ github.event.inputs.config }}" >> $GITHUB_OUTPUT
+          else
+            # Default for [retrain] pushes
+            echo "CONFIG=configs/dqn_tuned.yaml" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Train
+        env:
+          MLFLOW_TRACKING_URI: file:./mlruns
+        run: |
+          echo "Training with ${{ steps.cfg.outputs.CONFIG }}"
+          python train.py --config ${{ steps.cfg.outputs.CONFIG }}
+
+      - name: Upload trained policy
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: trained-policy-${{ github.sha }}
+          path: |
+            experiments/policies/
+            experiments/results/
+
+      - name: Upload MLflow runs
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: mlruns-${{ github.sha }}
+          path: mlruns/
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,166 @@
+name: CI
+
+# Triggers:
+# - every push to any branch (so feature branches get fast feedback)
+# - every PR targeting main or dev (gate for merge)
+on:
+  push:
+    branches: ["**"]
+  pull_request:
+    branches: [main, dev]
+
+# Avoid wasting CI minutes: cancel in-progress runs when a new commit lands on the same branch.
+concurrency:
+  group: ci-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  # ────────────────────────────────────────────────────────────
+  # 1. Lint — fast, fails the rest if style is broken
+  # ────────────────────────────────────────────────────────────
+  lint:
+    name: Lint (ruff)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+
+      - name: Install ruff
+        run: pip install ruff==0.2.0
+
+      - name: Run ruff
+        run: ruff check .
+
+  # ────────────────────────────────────────────────────────────
+  # 2. Tests — full pytest suite with coverage
+  # ────────────────────────────────────────────────────────────
+  test:
+    name: Tests (pytest)
+    runs-on: ubuntu-latest
+    needs: lint
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+          cache: pip
+
+      - name: Install CPU-only PyTorch
+        # Doing this separately keeps the image small and avoids CUDA wheels.
+        run: |
+          pip install --upgrade pip
+          pip install torch==2.2.0 --index-url https://download.pytorch.org/whl/cpu
+
+      - name: Install project dependencies
+        run: pip install -r requirements.txt
+
+      - name: Prepare data
+        # data_prep.py validates scenario CSVs and writes derived features.
+        # Many tests depend on its outputs being present. `all` processes
+        # weekday, weekend, and holiday_rush in one go.
+        run: python data_prep.py --scenario all
+
+      - name: Run pytest with coverage
+        run: |
+          pytest tests/ \
+            --cov=. \
+            --cov-report=term-missing \
+            --cov-report=xml \
+            --junitxml=pytest-results.xml \
+            -v
+
+      - name: Upload coverage report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-report
+          path: coverage.xml
+
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: pytest-results
+          path: pytest-results.xml
+
+  # ────────────────────────────────────────────────────────────
+  # 3. Build Docker images — verifies the Dockerfiles actually build
+  # ────────────────────────────────────────────────────────────
+  build-images:
+    name: Build Docker images
+    runs-on: ubuntu-latest
+    needs: test
+    # Only on push to main/dev — saves CI time on feature branches.
+    if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev')
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build training image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile.train
+          push: false
+          tags: food-rescue-train:ci-${{ github.sha }}
+          cache-from: type=gha,scope=train
+          cache-to: type=gha,scope=train,mode=max
+
+      - name: Build serving image
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile.serve
+          push: false
+          tags: food-rescue-serve:ci-${{ github.sha }}
+          cache-from: type=gha,scope=serve
+          cache-to: type=gha,scope=serve,mode=max
+
+  # ────────────────────────────────────────────────────────────
+  # 4. Smoke test the serving image — runs /health
+  # ────────────────────────────────────────────────────────────
+  smoke-test-serve:
+    name: Smoke test serving image
+    runs-on: ubuntu-latest
+    needs: build-images
+    if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/dev')
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build serving image (load locally)
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: Dockerfile.serve
+          load: true
+          tags: food-rescue-serve:smoke
+          cache-from: type=gha,scope=serve
+
+      - name: Run container and check /health
+        run: |
+          # Start the API in the background; it will boot in degraded mode
+          # since no policy is mounted, but /health should still respond.
+          docker run -d --name api-smoke -p 8000:8000 food-rescue-serve:smoke
+          # Give uvicorn ~10s to come up
+          for i in $(seq 1 30); do
+            if curl -fsS http://localhost:8000/health > /dev/null; then
+              echo "API is up after ${i}s"
+              break
+            fi
+            sleep 1
+          done
+          curl -fsS http://localhost:8000/health
+          docker logs api-smoke
+          docker stop api-smoke
diff --git a/Dockerfile.serve b/Dockerfile.serve
@@ -0,0 +1,23 @@
+FROM python:3.11-slim-bookworm AS serve
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+
+RUN pip install --upgrade pip \
+ && pip install --no-cache-dir \
+    torch==2.2.0 --index-url https://download.pytorch.org/whl/cpu \
+ && pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+ENV MLFLOW_TRACKING_URI=http://mlflow:5000
+ENV PORT=8000
+
+EXPOSE 8000
+
+CMD ["sh", "-c", "uvicorn api.main:app --host 0.0.0.0 --port ${PORT}"]
diff --git a/Dockerfile.train b/Dockerfile.train
@@ -0,0 +1,31 @@
+FROM python:3.11-slim-bookworm AS builder
+
+WORKDIR /app
+
+RUN apt-get update && apt-get install -y --no-install-recommends --fix-missing \
+    gcc g++ libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+
+RUN pip install --upgrade pip \
+ && pip install --no-cache-dir \
+    torch==2.2.0 --index-url https://download.pytorch.org/whl/cpu \
+ && pip install --no-cache-dir -r requirements.txt
+
+# ── runtime ──
+FROM python:3.11-slim-bookworm AS train
+
+WORKDIR /app
+
+COPY --from=builder /usr/local/lib/python3.11 /usr/local/lib/python3.11
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+COPY . .
+
+RUN python data_prep.py --scenario all
+
+ENV CONFIG=configs/qlearning_v1.yaml
+ENV MLFLOW_TRACKING_URI=http://mlflow:5000
+
+CMD ["sh", "-c", "python train.py --config ${CONFIG}"]