Azure-Samples · Cataldir · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026
diff --git a/.github/workflows/eval-continuous.yml b/.github/workflows/eval-continuous.yml
@@ -0,0 +1,120 @@
+name: agent-eval-continuous
+
+permissions:
+  contents: read
+  issues: write
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.inputs.agent || 'all' }}
+  cancel-in-progress: false
+
+on:
+  schedule:
+    - cron: '0 6 * * *'
+  workflow_dispatch:
+    inputs:
+      agent:
+        description: Optional agent name; leave empty for all discovered agents
+        required: false
+        default: ''
+      dry_run:
+        description: Run evaluations without creating drift issues
+        type: boolean
+        required: false
+        default: false
+
+jobs:
+  discover-agents:
+    name: discover eval scope
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.discover.outputs.matrix }}
+      has-agents: ${{ steps.discover.outputs.has-agents }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Build agent matrix
+        id: discover
+        env:
+          SELECTED_AGENT: ${{ github.event.inputs.agent || '' }}
+        run: |
+          python - <<'PY'
+          import glob
+          import json
+          import os
+
+          selected = os.environ.get("SELECTED_AGENT", "").strip()
+          discovered = []
+          for config_path in sorted(glob.glob("apps/*/.foundry/eval-config.yaml")):
+              agent_root = config_path.replace("/.foundry/eval-config.yaml", "")
+              agent_name = agent_root.split("/")[-1]
+              if selected and agent_name != selected:
+                  continue
+              discovered.append({"name": agent_name, "root": agent_root})
+
+          matrix = json.dumps({"include": discovered}, separators=(",", ":"))
+          with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf-8") as output:
+              print(f"matrix={matrix}", file=output)
+              print(f"has-agents={str(bool(discovered)).lower()}", file=output)
+          PY
+
+  monitor:
+    name: continuous eval (${{ matrix.name }})
+    needs: discover-agents
+    if: needs.discover-agents.outputs.has-agents == 'true'
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJson(needs.discover-agents.outputs.matrix) }}
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v5
+      - name: Install evaluation runtime
+        run: |
+          uv pip install --system -e ./lib/src
+      - name: Run continuous evaluation monitor
+        env:
+          AGENT_ROOT: ${{ matrix.root }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          DRY_RUN: ${{ github.event.inputs.dry_run || 'false' }}
+          PROJECT_ENDPOINT: ${{ secrets.FOUNDRY_PROJECT_ENDPOINT || '' }}
+          PROJECT_NAME: ${{ secrets.FOUNDRY_PROJECT_NAME || '' }}
+          FOUNDRY_PROJECT_ENDPOINT: ${{ secrets.FOUNDRY_PROJECT_ENDPOINT || '' }}
+          FOUNDRY_PROJECT_NAME: ${{ secrets.FOUNDRY_PROJECT_NAME || '' }}
+        run: |
+          set -euo pipefail
+          timestamp="$(date -u +%Y%m%dT%H%M%SZ)"
+          results_dir="$AGENT_ROOT/.foundry/results"
+          result_path="$results_dir/run-$timestamp.json"
+          log_path="$results_dir/run-$timestamp.log"
+          state_path="$results_dir/.drift_state.json"
+          mkdir -p "$results_dir"
+
+          issue_args="--create-issue"
+          if [ "$DRY_RUN" = "true" ]; then
+            issue_args="--dry-run"
+          fi
+
+          python scripts/ci/continuous_eval_monitor.py \
+            --agent-root "$AGENT_ROOT" \
+            --run-name "continuous-$timestamp-${{ matrix.name }}" \
+            --write-result "$result_path" \
+            --write-log "$log_path" \
+            --state-path "$state_path" \
+            --repo "${{ github.repository }}" \
+            --github-token "$GITHUB_TOKEN" \
+            $issue_args
+      - name: Upload evaluation artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: eval-${{ matrix.name }}-${{ github.run_id }}
+          path: |
+            ${{ matrix.root }}/.foundry/results/run-*.json
+            ${{ matrix.root }}/.foundry/results/run-*.log
+            ${{ matrix.root }}/.foundry/results/.drift_state.json
+          if-no-files-found: warn
diff --git a/docs/architecture/adrs/adr-017-deployment-strategy.md b/docs/architecture/adrs/adr-017-deployment-strategy.md
@@ -176,13 +176,21 @@ Required repository secrets:
 - `AZURE_TENANT_ID` — Azure AD tenant
 - `AZURE_SUBSCRIPTION_ID` — Target subscription
 
+# ADR-017: Deployment Strategy - azd Provisioning + Flux CD GitOps
+
+**Status**: Accepted (Revised)  
 ### Evaluation Workflow Integration (Amended: 2026-04)
 
-ADR-028 adds evaluation evidence to PR and deployment governance without changing the deployment source of truth. The current evaluation workflow is `.github/workflows/eval-advisory.yml`, whose workflow name is `agent-eval-advisory`. It discovers the pilot evaluation scope, runs `scripts/ci/run_agent_evaluation.py` for changed pilot agents, writes normalized `.foundry-results/*.json`, publishes job summaries, and uploads evaluation artifacts.
+ADR-028 integrates evaluation evidence into PR and deployment governance while preserving the azd + Flux deployment source of truth. The repository includes an advisory matrix workflow `.github/workflows/eval-advisory.yml` (`agent-eval-advisory`) that runs evaluation for changed pilot agents, publishes summaries, and uploads artifacts for reviewer evidence. Separately, `.github/workflows/eval-continuous.yml` (`agent-eval-continuous`) runs daily by default at `0 6 * * *` UTC to detect quality drift across agents that include `.foundry/eval-config.yaml`.
+
+Key controls for evaluation workflows:
 
-`agent-eval-advisory` is intentionally advisory and non-required. It must remain outside required branch-protection checks until `docs/governance/README.md` is explicitly revised to promote it. There is no `eval-gate.yml` or `eval-continuous.yml` workflow in the current repository snapshot, so deployment governance must reference the existing advisory workflow rather than stale gate names.
+- Both `agent-eval-advisory` and `agent-eval-continuous` are advisory and non-required by default. They must remain outside required branch-protection checks unless `docs/governance/README.md` is explicitly updated to promote them.
+- `agent-eval-continuous` discovers agents by scanning `apps/*/.foundry/eval-config.yaml` and runs the evaluation monitor in a matrix with `fail-fast: false`.
+- The continuous workflow writes run artifacts to per-agent `.foundry/results/` directories in the workflow workspace and uploads them as workflow artifacts; it does not commit result or baseline files back to the repository.
+- When drift is detected, the continuous workflow files an issue with labels `evaluation` and `drift:<severity>` unless `dry_run` is set. The workflow guards against duplicate open issues by searching existing open issues for a stable drift fingerprint.
 
-PR reviewers use evaluation artifacts as architecture and quality evidence when prompts, datasets, routing, or evaluation framework code changes. Deployment workflows remain governed by the azd + Flux path in this ADR; evaluation evidence can block a PR by human review policy, but it does not independently deploy, roll back, rename workflows, or bypass `lint` / `test` branch-protection baselines.
+These workflows are monitoring and advisory only: they do not perform automatic remediation, rollbacks, or code changes. Deployment governance continues to be enforced by azd + Flux and the `lint`/`test` baseline described in `docs/governance/README.md`.
 
 ## Consequences