diff --git a/cmd/kilroy/main.go b/cmd/kilroy/main.go
index 29a08b8d..fb7c1ea8 100644
--- a/cmd/kilroy/main.go
+++ b/cmd/kilroy/main.go
@@ -110,7 +110,7 @@ func loadEnvFile(args []string) []string {
 func usage() {
 	fmt.Fprintln(os.Stderr, "usage:")
 	fmt.Fprintln(os.Stderr, "  kilroy --version")
-	fmt.Fprintln(os.Stderr, "  kilroy [--env-file <path>] attractor run [--detach] [--preflight|--test-run] [--allow-test-shim] [--confirm-stale-build] [--no-cxdb] [--force-model <provider=model>] --graph <file.dot> --config <run.yaml> [--run-id <id>] [--logs-root <dir>]")
+	fmt.Fprintln(os.Stderr, "  kilroy [--env-file <path>] attractor run [--detach] [--validate|--preflight|--test-run] [--allow-test-shim] [--confirm-stale-build] [--no-cxdb] [--force-model <provider=model>] --graph <file.dot> --config <run.yaml> [--run-id <id>] [--logs-root <dir>]")
 	fmt.Fprintln(os.Stderr, "  kilroy attractor resume --logs-root <dir>")
 	fmt.Fprintln(os.Stderr, "  kilroy attractor resume --cxdb <http_base_url> --context-id <id>")
 	fmt.Fprintln(os.Stderr, "  kilroy attractor resume --run-branch <attractor/run/...> [--repo <path>]")
@@ -175,7 +175,7 @@ func attractorRun(args []string) {
 		switch args[i] {
 		case "--detach":
 			detach = true
-		case "--preflight":
+		case "--preflight", "--validate":
 			preflightOnly = true
 		case "--test-run":
 			preflightOnly = true
@@ -233,7 +233,7 @@ func attractorRun(args []string) {
 		os.Exit(1)
 	}
 	if preflightOnly && detach {
-		fmt.Fprintln(os.Stderr, "--preflight/--test-run cannot be combined with --detach")
+		fmt.Fprintln(os.Stderr, "--validate/--preflight/--test-run cannot be combined with --detach")
 		os.Exit(1)
 	}
 	if err := ensureFreshKilroyBuild(confirmStaleBuild); err != nil {
diff --git a/cmd/kilroy/main_exit_codes_test.go b/cmd/kilroy/main_exit_codes_test.go
index 7de27d1e..626f24ba 100644
--- a/cmd/kilroy/main_exit_codes_test.go
+++ b/cmd/kilroy/main_exit_codes_test.go
@@ -630,6 +630,31 @@ digraph G {
 	}
 }
 
+func TestAttractorRun_ValidateAlias_Accepted(t *testing.T) {
+	bin := buildKilroyBinary(t)
+	repo := initTestRepo(t)
+	catalog := writePinnedCatalog(t)
+	cfg := writeRunConfig(t, repo, "http://127.0.0.1:9", "127.0.0.1:9", catalog)
+
+	graph := filepath.Join(t.TempDir(), "validate.dot")
+	_ = os.WriteFile(graph, []byte(`
+digraph G {
+  start [shape=Mdiamond]
+  exit [shape=Msquare]
+  start -> exit
+}
+`), 0o644)
+
+	logsRoot := filepath.Join(t.TempDir(), "logs")
+	code, out := runKilroy(t, bin, "attractor", "run", "--graph", graph, "--config", cfg, "--run-id", "validate-flag", "--logs-root", logsRoot, "--no-cxdb", "--validate")
+	if code != 0 {
+		t.Fatalf("exit code: got %d want 0\n%s", code, out)
+	}
+	if !strings.Contains(out, "preflight=true") {
+		t.Fatalf("expected preflight marker, got:\n%s", out)
+	}
+}
+
 func TestAttractorRun_PreflightRejectsDetach(t *testing.T) {
 	bin := buildKilroyBinary(t)
 	repo := initTestRepo(t)
@@ -650,7 +675,7 @@ digraph G {
 	if code != 1 {
 		t.Fatalf("exit code: got %d want 1\n%s", code, out)
 	}
-	if !strings.Contains(out, "--preflight/--test-run cannot be combined with --detach") {
+	if !strings.Contains(out, "--validate/--preflight/--test-run cannot be combined with --detach") {
 		t.Fatalf("expected incompatible flag error, got:\n%s", out)
 	}
 }
@@ -782,6 +807,9 @@ func TestUsage_IncludesPreflightFlags(t *testing.T) {
 	if code != 1 {
 		t.Fatalf("exit code: got %d want 1\n%s", code, out)
 	}
+	if !strings.Contains(out, "--validate") {
+		t.Fatalf("usage should include --validate; output:\n%s", out)
+	}
 	if !strings.Contains(out, "--preflight") {
 		t.Fatalf("usage should include --preflight; output:\n%s", out)
 	}
diff --git a/demo/substack-spec-v01.dot b/demo/substack-spec-v01.dot
index 486eb721..1bcc4e10 100644
--- a/demo/substack-spec-v01.dot
+++ b/demo/substack-spec-v01.dot
@@ -81,7 +81,7 @@ digraph substack_spec_v01 {
             max_agent_turns=300,
             max_tokens=32768,
             label="Implement",
-            prompt="Goal: $goal\n\nRead .ai/runs/$KILROY_RUN_ID/plan_final.md, .ai/runs/$KILROY_RUN_ID/spec.md, and .ai/runs/$KILROY_RUN_ID/definition_of_done.md. If the spec or DoD files do not exist at those paths, fall back to reading substack-spec-v01.md and substack-dod-v01.md directly.\n\nBEFORE ANYTHING ELSE: check if .ai/verify_errors.log exists. If it does, read it — it contains the exact commands that failed and their error output from the verify chain. Fix every error listed in that file, then delete .ai/verify_errors.log when all fixes are applied. Do NOT regenerate working code — only fix the specific errors.\n\nAlso check if .ai/runs/$KILROY_RUN_ID/verify_fidelity.md exists. If it does, read it — it contains per-AC pass/fail verdicts from the fidelity check. Fix every failing AC listed in that file.\n\nIf .ai/runs/$KILROY_RUN_ID/postmortem_latest.md exists, read it and fix ONLY identified gaps — do NOT regenerate working code. On repair passes, read and fix existing files rather than skipping them.\n\nImplement the complete Substack Creator Newsletter Engine as a single pass. On a fresh pass (no postmortem), check if target files already exist on disk and are non-empty — if so, skip those files. Implement each module with complete, functional code — no stubs, no placeholders, no TODO comments. Follow the plan and spec precisely.\n\nImplementation order (core infrastructure first, then features, then tests/deploy):\n\n1. Project scaffold — package.json, vite.config.ts, tsconfig.json, index.html, src/main.tsx, src/App.tsx. Install dependencies: react, react-dom, react-router-dom, idb, @google/generative-ai. Write ALL validation scripts:\n   - scripts/validate-build.sh: runs npm run build, checks dist/ exists\n   - scripts/validate-fmt.sh: runs npx prettier --check src/\n   - scripts/validate-test.sh: runs integration scenarios first, then smoke, writes evidence + .ai/test-evidence/latest/manifest.json even on failure\n   - scripts/validate-browser.sh: runs browser verification and captures artifacts\n   - scripts/fix-fmt.sh: runs npx prettier --write src/\n   - scripts/validate-artifacts.sh: verifies manifest scenario IDs match DoD integration scenarios\n   All scripts: #!/bin/sh, set -e, POSIX sh failure trap.\n\n2. LLM client — src/lib/llm.ts, src/lib/llm-schemas.ts: structured JSON output with schema enforcement, retry with error feedback and intelligent backoff, model switching (gemini-3-flash-preview/gemini-3.1-pro-preview/gemini-2.5-flash-lite), client-side API key.\n\n3. Persistence — src/lib/db.ts, src/lib/types.ts: IndexedDB via idb with stores for configuration (API key, company, voice, guardrails), drafts, sessions (all inputs/LLM responses/intermediate state), post history (Markdown + attribution). All data persists unless user resets.\n\n4. Shared UI — src/components/RichInput.tsx, Card.tsx, ProgressBar.tsx, StepIndicator.tsx, src/styles/global.css.\n\n5. Setup flow — src/pages/Settings.tsx and step components: API key, company (rich input + gemini-3.1-pro-preview confirm + back), voice, guardrails. Parallel completion, status icons. Reset everything with confirmation.\n\n6. Dashboard — src/pages/Dashboard.tsx: New Post button, Trending Topics button, Settings link, post history, draft resume.\n\n7. Trending Topics — src/pages/TrendingTopics.tsx: parallel gemini-3-flash-preview search grounding queries, trend visualization, 3 gemini-3.1-pro-preview writing prompts, navigate to New Post prefilled.\n\n8. New Post — src/pages/NewPost.tsx with step components: Topic (rich input), Research (gemini-3-flash-preview search grounding, source cards with URL/title/author/date, highlight/delete), Outline (gemini-3.1-pro-preview one-shot, accept/back), Write (3 automatic gemini-3.1-pro-preview cycles: Write with citations, Edit for style, Guardrails-only), Complete (serif post with numbered footnotes, linked sources, attribution lineage).\n\n9. Demo mode — src/lib/demo.ts, src/pages/DemoMode.tsx, src/demo/bundled-session.json: session recording, replay through production path (fade-in prefills, highlight next button), bundled P&G session, cache-miss error with no API fallback.\n\n10. Test infrastructure — src/__tests__/: integration with canned data, smoke with gemini-2.5-flash-lite, manual mode option.\n\n11. Deploy config — railway.json or equivalent, deployment docs.\n\nEnsure App.tsx routing includes all pages. Verify imports/exports are consistent. Run npm install. Fix TypeScript errors.\n\nLog progress to .ai/runs/$KILROY_RUN_ID/implementation_log.md.\n\nPRE-EXIT VERIFICATION: if .ai/runs/$KILROY_RUN_ID/postmortem_latest.md exists, run sh scripts/validate-build.sh and re-read targeted files to confirm fixes.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH) ONLY on failure: status=fail with failure_reason, failure_class, and failure_signature=implement_fail."
+            prompt="Goal: $goal\n\nRead .ai/runs/$KILROY_RUN_ID/plan_final.md, .ai/runs/$KILROY_RUN_ID/spec.md, and .ai/runs/$KILROY_RUN_ID/definition_of_done.md. If the spec or DoD files do not exist at those paths, fall back to reading substack-spec-v01.md and substack-dod-v01.md directly.\n\nBEFORE ANYTHING ELSE: check if .ai/runs/$KILROY_RUN_ID/verify_errors.log exists. If it does, read it — it contains the exact commands that failed and their error output from the verify chain. Fix every error listed in that file, then delete .ai/runs/$KILROY_RUN_ID/verify_errors.log when all fixes are applied. Do NOT regenerate working code — only fix the specific errors.\n\nAlso check if .ai/runs/$KILROY_RUN_ID/verify_fidelity.md exists. If it does, read it — it contains per-AC pass/fail verdicts from the fidelity check. Fix every failing AC listed in that file.\n\nIf .ai/runs/$KILROY_RUN_ID/postmortem_latest.md exists, read it and fix ONLY identified gaps — do NOT regenerate working code. On repair passes, read and fix existing files rather than skipping them.\n\nImplement the complete Substack Creator Newsletter Engine as a single pass. On a fresh pass (no postmortem), check if target files already exist on disk and are non-empty — if so, skip those files. Implement each module with complete, functional code — no stubs, no placeholders, no TODO comments. Follow the plan and spec precisely.\n\nImplementation order (core infrastructure first, then features, then tests/deploy):\n\n1. Project scaffold — package.json, vite.config.ts, tsconfig.json, index.html, src/main.tsx, src/App.tsx. Install dependencies: react, react-dom, react-router-dom, idb, @google/generative-ai. Write ALL validation scripts:\n   - scripts/validate-build.sh: runs npm run build, checks dist/ exists\n   - scripts/validate-fmt.sh: runs npx prettier --check src/\n   - scripts/validate-test.sh: runs integration scenarios first, then smoke, writes evidence + .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json even on failure\n   - scripts/validate-browser.sh: runs browser verification and captures artifacts\n   - scripts/fix-fmt.sh: runs npx prettier --write src/\n   - scripts/validate-artifacts.sh: verifies manifest scenario IDs match DoD integration scenarios\n   All scripts: #!/bin/sh, set -e, POSIX sh failure trap.\n\n2. LLM client — src/lib/llm.ts, src/lib/llm-schemas.ts: structured JSON output with schema enforcement, retry with error feedback and intelligent backoff, model switching (gemini-3-flash-preview/gemini-3.1-pro-preview/gemini-2.5-flash-lite), client-side API key.\n\n3. Persistence — src/lib/db.ts, src/lib/types.ts: IndexedDB via idb with stores for configuration (API key, company, voice, guardrails), drafts, sessions (all inputs/LLM responses/intermediate state), post history (Markdown + attribution). All data persists unless user resets.\n\n4. Shared UI — src/components/RichInput.tsx, Card.tsx, ProgressBar.tsx, StepIndicator.tsx, src/styles/global.css.\n\n5. Setup flow — src/pages/Settings.tsx and step components: API key, company (rich input + gemini-3.1-pro-preview confirm + back), voice, guardrails. Parallel completion, status icons. Reset everything with confirmation.\n\n6. Dashboard — src/pages/Dashboard.tsx: New Post button, Trending Topics button, Settings link, post history, draft resume.\n\n7. Trending Topics — src/pages/TrendingTopics.tsx: parallel gemini-3-flash-preview search grounding queries, trend visualization, 3 gemini-3.1-pro-preview writing prompts, navigate to New Post prefilled.\n\n8. New Post — src/pages/NewPost.tsx with step components: Topic (rich input), Research (gemini-3-flash-preview search grounding, source cards with URL/title/author/date, highlight/delete), Outline (gemini-3.1-pro-preview one-shot, accept/back), Write (3 automatic gemini-3.1-pro-preview cycles: Write with citations, Edit for style, Guardrails-only), Complete (serif post with numbered footnotes, linked sources, attribution lineage).\n\n9. Demo mode — src/lib/demo.ts, src/pages/DemoMode.tsx, src/demo/bundled-session.json: session recording, replay through production path (fade-in prefills, highlight next button), bundled P&G session, cache-miss error with no API fallback.\n\n10. Test infrastructure — src/__tests__/: integration with canned data, smoke with gemini-2.5-flash-lite, manual mode option.\n\n11. Deploy config — railway.json or equivalent, deployment docs.\n\nEnsure App.tsx routing includes all pages. Verify imports/exports are consistent. Run npm install. Fix TypeScript errors.\n\nLog progress to .ai/runs/$KILROY_RUN_ID/implementation_log.md.\n\nPRE-EXIT VERIFICATION: if .ai/runs/$KILROY_RUN_ID/postmortem_latest.md exists, run sh scripts/validate-build.sh and re-read targeted files to confirm fixes.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH) ONLY on failure: status=fail with failure_reason, failure_class, and failure_signature=implement_fail."
         ]
     }
 
@@ -95,46 +95,46 @@ digraph substack_spec_v01 {
         fix_fmt [
             shape=parallelogram,
             max_retries=0,
-            tool_command="sh scripts/fix-fmt.sh 2>&1 || { printf '\\n=== VERIFY FAILURE: sh scripts/fix-fmt.sh ===\\n%s\\n' \"$(cat /tmp/fix-fmt.log 2>/dev/null || echo 'script missing or produced no output')\" >> .ai/verify_errors.log; exit 1; }"
+            tool_command="sh scripts/fix-fmt.sh 2>&1 || { printf '\\n=== VERIFY FAILURE: sh scripts/fix-fmt.sh ===\\n%s\\n' \"$(cat /tmp/fix-fmt.log 2>/dev/null || echo 'script missing or produced no output')\" >> .ai/runs/$KILROY_RUN_ID/verify_errors.log; exit 1; }"
         ]
 
         verify_fmt [
             shape=parallelogram,
             max_retries=0,
-            tool_command="sh scripts/validate-fmt.sh 2>&1 | tee /tmp/validate-fmt.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-fmt.sh ===\\n%s\\n' \"$(tail -30 /tmp/validate-fmt.log)\" >> .ai/verify_errors.log; exit 1; }"
+            tool_command="sh scripts/validate-fmt.sh 2>&1 | tee /tmp/validate-fmt.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-fmt.sh ===\\n%s\\n' \"$(tail -30 /tmp/validate-fmt.log)\" >> .ai/runs/$KILROY_RUN_ID/verify_errors.log; exit 1; }"
         ]
         check_fmt [shape=diamond, label="Fmt OK?"]
 
         verify_build [
             shape=parallelogram,
-            tool_command="sh scripts/validate-build.sh 2>&1 | tee /tmp/validate-build.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-build.sh ===\\n%s\\n' \"$(tail -50 /tmp/validate-build.log)\" >> .ai/verify_errors.log; exit 1; }"
+            tool_command="sh scripts/validate-build.sh 2>&1 | tee /tmp/validate-build.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-build.sh ===\\n%s\\n' \"$(tail -50 /tmp/validate-build.log)\" >> .ai/runs/$KILROY_RUN_ID/verify_errors.log; exit 1; }"
         ]
         check_build [shape=diamond, label="Build OK?"]
 
         verify_test [
             shape=parallelogram,
-            tool_command="sh scripts/validate-test.sh 2>&1 | tee /tmp/validate-test.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-test.sh ===\\n%s\\n' \"$(tail -50 /tmp/validate-test.log)\" >> .ai/verify_errors.log; exit 1; }"
+            tool_command="sh scripts/validate-test.sh 2>&1 | tee /tmp/validate-test.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-test.sh ===\\n%s\\n' \"$(tail -50 /tmp/validate-test.log)\" >> .ai/runs/$KILROY_RUN_ID/verify_errors.log; exit 1; }"
         ]
         check_test [shape=diamond, label="Tests OK?"]
 
         verify_browser [
             shape=parallelogram,
             collect_browser_artifacts=true,
-            tool_command="sh scripts/validate-browser.sh 2>&1 | tee /tmp/validate-browser.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-browser.sh ===\\n%s\\n' \"$(tail -50 /tmp/validate-browser.log)\" >> .ai/verify_errors.log; exit 1; }"
+            tool_command="sh scripts/validate-browser.sh 2>&1 | tee /tmp/validate-browser.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-browser.sh ===\\n%s\\n' \"$(tail -50 /tmp/validate-browser.log)\" >> .ai/runs/$KILROY_RUN_ID/verify_errors.log; exit 1; }"
         ]
         check_browser [shape=diamond, label="Browser OK?"]
 
         verify_artifacts [
             shape=parallelogram,
             max_retries=0,
-            tool_command="sh scripts/validate-artifacts.sh 2>&1 | tee /tmp/validate-artifacts.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-artifacts.sh ===\\n%s\\n' \"$(tail -30 /tmp/validate-artifacts.log)\" >> .ai/verify_errors.log; exit 1; }"
+            tool_command="sh scripts/validate-artifacts.sh 2>&1 | tee /tmp/validate-artifacts.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-artifacts.sh ===\\n%s\\n' \"$(tail -30 /tmp/validate-artifacts.log)\" >> .ai/runs/$KILROY_RUN_ID/verify_errors.log; exit 1; }"
         ]
         check_artifacts [shape=diamond, label="Artifacts OK?"]
 
         verify_fidelity [
             shape=box,
             class="verify",
-            prompt="Read .ai/runs/$KILROY_RUN_ID/spec.md, .ai/runs/$KILROY_RUN_ID/definition_of_done.md, .ai/runs/$KILROY_RUN_ID/verify_fidelity.md (if present), .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json, and relevant implementation files.\n\nEvaluate these grouped acceptance checks and map each to concrete file paths:\nAC1: src/**/settings* and src/**/router* and src/**/indexeddb* - first-run routing, setup flow, persistence.\nAC2: src/**/dashboard* and src/**/history* and src/**/draft* - dashboard actions and resume/view flows.\nAC3: src/**/trending* and src/**/research* - grounded research, deterministic trends, prompt handoff.\nAC4: src/**/new-post* and src/**/outline* and src/**/write* and src/**/complete* - full Topic->Complete pipeline with automatic write cycles.\nAC5: src/**/citation* and src/**/markdown* - citation lineage, footnote rendering, attribution persistence.\nAC6: src/**/demo* and src/demo/** - session picker/replay, bundled P&G demo, cache-miss no-fallback behavior.\nAC7: src/**/llm* and src/**/schema* - structured outputs, retry/backoff, production/test model intent.\nAC8: scripts/validate-build.sh and railway.json/Procfile/README* - build/deploy-readiness by static review only, no live deployment execution.\nAC9: scripts/validate-test.sh and scripts/validate-browser.sh and test sources - integration before smoke, manual mode option, browser evidence capture.\nAC10: .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json and .ai/runs/$KILROY_RUN_ID/test-evidence/latest/** - IT-1..IT-12 manifest coverage and required artifact types.\nAC11: src/**/style* and src/**/card* and src/**/progress* - Substack-like visual contract including bars, cards, and serif post preview.\n\nWrite .ai/runs/$KILROY_RUN_ID/verify_fidelity.md with pass/fail verdict and evidence per AC1..AC11.\n\nOn ANY failure: also append to .ai/verify_errors.log with the header '=== VERIFY FAILURE: verify_fidelity ===' followed by the list of failing ACs and their specific issues, so the implement node can read the consolidated error log.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH):\nAll pass: status=success.\nAny fail: status=fail with failure_reason listing failing ACs, failure_class=deterministic_agent_bug, failure_signature as sorted comma-separated failing AC IDs (e.g. AC1,AC10)."
+            prompt="Read .ai/runs/$KILROY_RUN_ID/spec.md, .ai/runs/$KILROY_RUN_ID/definition_of_done.md, .ai/runs/$KILROY_RUN_ID/verify_fidelity.md (if present), .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json, and relevant implementation files.\n\nEvaluate these grouped acceptance checks and map each to concrete file paths:\nAC1: src/**/settings* and src/**/router* and src/**/indexeddb* - first-run routing, setup flow, persistence.\nAC2: src/**/dashboard* and src/**/history* and src/**/draft* - dashboard actions and resume/view flows.\nAC3: src/**/trending* and src/**/research* - grounded research, deterministic trends, prompt handoff.\nAC4: src/**/new-post* and src/**/outline* and src/**/write* and src/**/complete* - full Topic->Complete pipeline with automatic write cycles.\nAC5: src/**/citation* and src/**/markdown* - citation lineage, footnote rendering, attribution persistence.\nAC6: src/**/demo* and src/demo/** - session picker/replay, bundled P&G demo, cache-miss no-fallback behavior.\nAC7: src/**/llm* and src/**/schema* - structured outputs, retry/backoff, production/test model intent.\nAC8: scripts/validate-build.sh and railway.json/Procfile/README* - build/deploy-readiness by static review only, no live deployment execution.\nAC9: scripts/validate-test.sh and scripts/validate-browser.sh and test sources - integration before smoke, manual mode option, browser evidence capture.\nAC10: .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json and .ai/runs/$KILROY_RUN_ID/test-evidence/latest/** - IT-1..IT-12 manifest coverage and required artifact types.\nAC11: src/**/style* and src/**/card* and src/**/progress* - Substack-like visual contract including bars, cards, and serif post preview.\n\nWrite .ai/runs/$KILROY_RUN_ID/verify_fidelity.md with pass/fail verdict and evidence per AC1..AC11.\n\nOn ANY failure: also append to .ai/runs/$KILROY_RUN_ID/verify_errors.log with the header '=== VERIFY FAILURE: verify_fidelity ===' followed by the list of failing ACs and their specific issues, so the implement node can read the consolidated error log.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH):\nAll pass: status=success.\nAny fail: status=fail with failure_reason listing failing ACs, failure_class=deterministic_agent_bug, failure_signature as sorted comma-separated failing AC IDs (e.g. AC1,AC10)."
         ]
         check_impl [shape=diamond, label="Impl OK?"]
     }
@@ -151,12 +151,12 @@ digraph substack_spec_v01 {
 
         review_a [
             class="branch-a",
-            prompt="Review the Substack Creator Newsletter Engine implementation against .ai/runs/$KILROY_RUN_ID/definition_of_done.md.\n\nRead the DoD for acceptance criteria. Read all implementation source files and .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json.\n\n## MANDATORY: Browser verification\nYou MUST verify the app works in a real browser. Do not trust code reading alone.\n1. Run: npm run build (must exit 0)\n2. Start the preview server: npx vite preview --port 4567 &\n3. Wait 2 seconds, then use curl to fetch http://localhost:4567/ and verify it returns HTML with a root div\n4. Check that the HTML references JS and CSS bundles\n5. Kill the preview server when done\n6. Check browser artifacts in .ai/test-evidence/latest/ — screenshots must be real rendered pages (not 1x1 placeholders). If screenshot files are under 5KB, they are fake. REJECT.\n7. Check that playwright-report or equivalent browser test output exists and shows real test execution\n\nIf browser verification fails or artifacts are fake, REJECT immediately.\n\n## Code and AC verification\nCheck every AC group (AC1 through AC11):\n\nAC1: Build exits 0, static assets produced, deployment config present and coherent (review only, no live deploy)\nAC2: IndexedDB persistence for API key, config, posts, drafts, sessions, attribution mappings across reloads\nAC3: Structured JSON output with retry/backoff, correct model routing (gemini-3-flash-preview/gemini-3.1-pro-preview/gemini-2.5-flash-lite), client-side key\nAC4: Parallel setup (any order), status icons, rich input (text/upload/link), gemini-3.1-pro-preview confirmation, back button\nAC5: Dashboard with New Post and Trending Topics buttons, Settings link, post history, draft resume\nAC6: Trending Topics: parallel gemini-3-flash-preview search research, trend visualization, 3 gemini-3.1-pro-preview writing prompts, navigate to New Post\nAC7: Full post pipeline with source metadata, highlight/delete, one-shot outline, 3 automatic write cycles, citations with attribution lineage\nAC8: Demo replay through production path, fade-in/highlight, bundled P&G session, cache-miss error\nAC9: Validation scripts and runtime evidence contract for build/test/browser checks\nAC10: IT-1..IT-12 evidence manifest coverage and artifact completeness\nAC11: Substack visual design: serif fonts, step dots, card primitive, accent progress bars, no spinners, footnoted post\n\nVerdict: APPROVED (all criteria met with evidence) or REJECTED (specific gaps by AC ID).\nWrite to .ai/runs/$KILROY_RUN_ID/review_a.md.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH): On success: status=success. On failure: status=fail with failure_reason and failure_class."
+            prompt="Review the Substack Creator Newsletter Engine implementation against .ai/runs/$KILROY_RUN_ID/definition_of_done.md.\n\nRead the DoD for acceptance criteria. Read all implementation source files and .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json.\n\n## MANDATORY: Browser verification\nYou MUST verify the app works in a real browser. Do not trust code reading alone.\n1. Run: npm run build (must exit 0)\n2. Start the preview server: npx vite preview --port 4567 &\n3. Wait 2 seconds, then use curl to fetch http://localhost:4567/ and verify it returns HTML with a root div\n4. Check that the HTML references JS and CSS bundles\n5. Kill the preview server when done\n6. Check browser artifacts in .ai/runs/$KILROY_RUN_ID/test-evidence/latest/ — screenshots must be real rendered pages (not 1x1 placeholders). If screenshot files are under 5KB, they are fake. REJECT.\n7. Check that playwright-report or equivalent browser test output exists and shows real test execution\n\nIf browser verification fails or artifacts are fake, REJECT immediately.\n\n## Code and AC verification\nCheck every AC group (AC1 through AC11):\n\nAC1: Build exits 0, static assets produced, deployment config present and coherent (review only, no live deploy)\nAC2: IndexedDB persistence for API key, config, posts, drafts, sessions, attribution mappings across reloads\nAC3: Structured JSON output with retry/backoff, correct model routing (gemini-3-flash-preview/gemini-3.1-pro-preview/gemini-2.5-flash-lite), client-side key\nAC4: Parallel setup (any order), status icons, rich input (text/upload/link), gemini-3.1-pro-preview confirmation, back button\nAC5: Dashboard with New Post and Trending Topics buttons, Settings link, post history, draft resume\nAC6: Trending Topics: parallel gemini-3-flash-preview search research, trend visualization, 3 gemini-3.1-pro-preview writing prompts, navigate to New Post\nAC7: Full post pipeline with source metadata, highlight/delete, one-shot outline, 3 automatic write cycles, citations with attribution lineage\nAC8: Demo replay through production path, fade-in/highlight, bundled P&G session, cache-miss error\nAC9: Validation scripts and runtime evidence contract for build/test/browser checks\nAC10: IT-1..IT-12 evidence manifest coverage and artifact completeness\nAC11: Substack visual design: serif fonts, step dots, card primitive, accent progress bars, no spinners, footnoted post\n\nVerdict: APPROVED (all criteria met with evidence) or REJECTED (specific gaps by AC ID).\nWrite to .ai/runs/$KILROY_RUN_ID/review_a.md.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH): On success: status=success. On failure: status=fail with failure_reason and failure_class."
         ]
 
         review_b [
             class="branch-b",
-            prompt="Review the Substack Creator Newsletter Engine implementation against .ai/runs/$KILROY_RUN_ID/definition_of_done.md.\n\nRead the DoD for acceptance criteria. Read all implementation source files and .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json.\n\n## MANDATORY: Browser verification\nYou MUST verify the app works in a real browser. Do not trust code reading alone.\n1. Run: npm run build (must exit 0)\n2. Start the preview server: npx vite preview --port 4568 &\n3. Wait 2 seconds, then use curl to fetch http://localhost:4568/ and verify it returns HTML with a root div\n4. Check that the HTML references JS and CSS bundles\n5. Kill the preview server when done\n6. Check browser artifacts in .ai/test-evidence/latest/ — screenshots must be real rendered pages (not 1x1 placeholders). If screenshot files are under 5KB, they are fake. REJECT.\n7. Check that playwright-report or equivalent browser test output exists and shows real test execution\n\nIf browser verification fails or artifacts are fake, REJECT immediately.\n\n## Code and AC verification\nCheck every AC group (AC1 through AC11):\n\nAC1: Build exits 0, static assets produced, deployment config present and coherent (review only, no live deploy)\nAC2: IndexedDB persistence for API key, config, posts, drafts, sessions, attribution mappings across reloads\nAC3: Structured JSON output with retry/backoff, correct model routing (gemini-3-flash-preview/gemini-3.1-pro-preview/gemini-2.5-flash-lite), client-side key\nAC4: Parallel setup (any order), status icons, rich input (text/upload/link), gemini-3.1-pro-preview confirmation, back button\nAC5: Dashboard with New Post and Trending Topics buttons, Settings link, post history, draft resume\nAC6: Trending Topics: parallel gemini-3-flash-preview search research, trend visualization, 3 gemini-3.1-pro-preview writing prompts, navigate to New Post\nAC7: Full post pipeline with source metadata, highlight/delete, one-shot outline, 3 automatic write cycles, citations with attribution lineage\nAC8: Demo replay through production path, fade-in/highlight, bundled P&G session, cache-miss error\nAC9: Validation scripts and runtime evidence contract for build/test/browser checks\nAC10: IT-1..IT-12 evidence manifest coverage and artifact completeness\nAC11: Substack visual design: serif fonts, step dots, card primitive, accent progress bars, no spinners, footnoted post\n\nVerdict: APPROVED (all criteria met with evidence) or REJECTED (specific gaps by AC ID).\nWrite to .ai/runs/$KILROY_RUN_ID/review_b.md.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH): On success: status=success. On failure: status=fail with failure_reason and failure_class."
+            prompt="Review the Substack Creator Newsletter Engine implementation against .ai/runs/$KILROY_RUN_ID/definition_of_done.md.\n\nRead the DoD for acceptance criteria. Read all implementation source files and .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json.\n\n## MANDATORY: Browser verification\nYou MUST verify the app works in a real browser. Do not trust code reading alone.\n1. Run: npm run build (must exit 0)\n2. Start the preview server: npx vite preview --port 4568 &\n3. Wait 2 seconds, then use curl to fetch http://localhost:4568/ and verify it returns HTML with a root div\n4. Check that the HTML references JS and CSS bundles\n5. Kill the preview server when done\n6. Check browser artifacts in .ai/runs/$KILROY_RUN_ID/test-evidence/latest/ — screenshots must be real rendered pages (not 1x1 placeholders). If screenshot files are under 5KB, they are fake. REJECT.\n7. Check that playwright-report or equivalent browser test output exists and shows real test execution\n\nIf browser verification fails or artifacts are fake, REJECT immediately.\n\n## Code and AC verification\nCheck every AC group (AC1 through AC11):\n\nAC1: Build exits 0, static assets produced, deployment config present and coherent (review only, no live deploy)\nAC2: IndexedDB persistence for API key, config, posts, drafts, sessions, attribution mappings across reloads\nAC3: Structured JSON output with retry/backoff, correct model routing (gemini-3-flash-preview/gemini-3.1-pro-preview/gemini-2.5-flash-lite), client-side key\nAC4: Parallel setup (any order), status icons, rich input (text/upload/link), gemini-3.1-pro-preview confirmation, back button\nAC5: Dashboard with New Post and Trending Topics buttons, Settings link, post history, draft resume\nAC6: Trending Topics: parallel gemini-3-flash-preview search research, trend visualization, 3 gemini-3.1-pro-preview writing prompts, navigate to New Post\nAC7: Full post pipeline with source metadata, highlight/delete, one-shot outline, 3 automatic write cycles, citations with attribution lineage\nAC8: Demo replay through production path, fade-in/highlight, bundled P&G session, cache-miss error\nAC9: Validation scripts and runtime evidence contract for build/test/browser checks\nAC10: IT-1..IT-12 evidence manifest coverage and artifact completeness\nAC11: Substack visual design: serif fonts, step dots, card primitive, accent progress bars, no spinners, footnoted post\n\nVerdict: APPROVED (all criteria met with evidence) or REJECTED (specific gaps by AC ID).\nWrite to .ai/runs/$KILROY_RUN_ID/review_b.md.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH): On success: status=success. On failure: status=fail with failure_reason and failure_class."
         ]
 
         review_consensus [
@@ -204,7 +204,7 @@ digraph substack_spec_v01 {
     // Implement -> Verify chain
     implement -> fix_fmt
 
-    // Verify chain — failures go directly back to implement (errors logged to .ai/verify_errors.log)
+    // Verify chain — failures go directly back to implement (errors logged to .ai/runs/$KILROY_RUN_ID/verify_errors.log)
     fix_fmt -> verify_fmt
     verify_fmt -> check_fmt
     check_fmt -> verify_build [condition="outcome=success"]
diff --git a/internal/attractor/engine/cli_only_models.go b/internal/attractor/engine/cli_only_models.go
index 2ff9de6a..2feb2bff 100644
--- a/internal/attractor/engine/cli_only_models.go
+++ b/internal/attractor/engine/cli_only_models.go
@@ -5,7 +5,8 @@ import "strings"
 // cliOnlyModelIDs lists models that MUST route through CLI backend regardless
 // of provider backend configuration. These models have no API endpoint.
 var cliOnlyModelIDs = map[string]bool{
-	"gpt-5.4-spark": true,
+	"gpt-5.3-codex-spark": true,
+	"gpt-5.4-spark":       true,
 }
 
 // isCLIOnlyModel returns true if the given model ID (with or without provider
diff --git a/internal/attractor/engine/cli_only_models_test.go b/internal/attractor/engine/cli_only_models_test.go
index 4e03c289..503144f8 100644
--- a/internal/attractor/engine/cli_only_models_test.go
+++ b/internal/attractor/engine/cli_only_models_test.go
@@ -8,7 +8,7 @@ func TestIsCLIOnlyModel(t *testing.T) {
 		want  bool
 	}{
 		{"gpt-5.4-spark", true},
-		{"GPT-5.4-SPARK", true},        // case-insensitive
+		{"GPT-5.4-SPARK", true},  // case-insensitive
 		{"openai/gpt-5.4-spark", true}, // with provider prefix
 		{"gpt-5.4", false},             // regular codex
 		{"gpt-5.4", false},
diff --git a/internal/attractor/engine/codergen_heartbeat_test.go b/internal/attractor/engine/codergen_heartbeat_test.go
index 408e8a7e..531b1a62 100644
--- a/internal/attractor/engine/codergen_heartbeat_test.go
+++ b/internal/attractor/engine/codergen_heartbeat_test.go
@@ -350,7 +350,9 @@ digraph G {
 // TestRunWithConfig_APIBackend_StallWatchdogFiresDespiteHeartbeatGoroutine verifies
 // that the stall watchdog still fires when the API agent_loop session is truly
 // stalled (no new session events) even though the heartbeat goroutine is running.
-// The conditional heartbeat should NOT emit progress when event_count is static.
+// Heartbeat events are emitted unconditionally for observability but use
+// appendProgressLivenessOnly when no new events are produced, which does not
+// reset the stall watchdog timer.
 func TestRunWithConfig_APIBackend_StallWatchdogFiresDespiteHeartbeatGoroutine(t *testing.T) {
 	repo := initTestRepo(t)
 	logsRoot := t.TempDir()
@@ -417,7 +419,9 @@ digraph G {
 // TestRunWithConfig_CLIBackend_StallWatchdogFiresDespiteHeartbeatGoroutine verifies
 // that the stall watchdog still fires when the CLI codergen process is truly
 // stalled (no stdout/stderr output) even though the heartbeat goroutine is running.
-// The conditional heartbeat should NOT emit progress when file sizes are static.
+// Heartbeat events are emitted unconditionally for observability but use
+// appendProgressLivenessOnly when no output growth is detected, which does not
+// reset the stall watchdog timer.
 func TestRunWithConfig_CLIBackend_StallWatchdogFiresDespiteHeartbeatGoroutine(t *testing.T) {
 	repo := initTestRepo(t)
 	logsRoot := t.TempDir()
@@ -472,6 +476,95 @@ digraph G {
 	t.Logf("stall watchdog fired as expected: %v", err)
 }
 
+func TestRunWithConfig_HeartbeatEmitsDuringQuietPeriods(t *testing.T) {
+	repo := initTestRepo(t)
+	logsRoot := t.TempDir()
+
+	pinned := writePinnedCatalog(t)
+	cxdbSrv := newCXDBTestServer(t)
+
+	// Create a mock codex CLI that produces initial output, then goes quiet,
+	// then produces more output. The quiet period should still produce heartbeats.
+	cli := filepath.Join(t.TempDir(), "codex")
+	if err := os.WriteFile(cli, []byte(`#!/usr/bin/env bash
+set -euo pipefail
+echo '{"item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"starting"}]}}' >&1
+# Quiet period: no output for 3 seconds.
+sleep 3
+echo '{"item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"done"}]}}' >&1
+`), 0o755); err != nil {
+		t.Fatal(err)
+	}
+
+	t.Setenv("KILROY_CODERGEN_HEARTBEAT_INTERVAL", "1s")
+	t.Setenv("KILROY_CODEX_IDLE_TIMEOUT", "10s")
+
+	cfg := &RunConfigFile{Version: 1}
+	cfg.Repo.Path = repo
+	cfg.CXDB.BinaryAddr = cxdbSrv.BinaryAddr()
+	cfg.CXDB.HTTPBaseURL = cxdbSrv.URL()
+	cfg.LLM.CLIProfile = "test_shim"
+	cfg.LLM.Providers = map[string]ProviderConfig{
+		"openai": {Backend: BackendCLI, Executable: cli},
+	}
+	cfg.ModelDB.OpenRouterModelInfoPath = pinned
+	cfg.ModelDB.OpenRouterModelInfoUpdatePolicy = "pinned"
+	cfg.Git.RunBranchPrefix = "attractor/run"
+
+	dot := []byte(`
+digraph G {
+  graph [goal="test quiet period heartbeats"]
+  start [shape=Mdiamond]
+  exit  [shape=Msquare]
+  a [shape=box, llm_provider=openai, llm_model=gpt-5.4, prompt="do something quiet"]
+  start -> a -> exit
+}
+`)
+
+	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
+	defer cancel()
+	res, err := RunWithConfig(ctx, dot, cfg, RunOptions{RunID: "quiet-heartbeat-test", LogsRoot: logsRoot, AllowTestShim: true})
+	if err != nil {
+		t.Fatalf("RunWithConfig: %v", err)
+	}
+
+	progressPath := filepath.Join(res.LogsRoot, "progress.ndjson")
+	data, err := os.ReadFile(progressPath)
+	if err != nil {
+		t.Fatalf("read progress.ndjson: %v", err)
+	}
+
+	heartbeats := 0
+	var hasQuietHeartbeat bool
+	for _, line := range strings.Split(string(data), "\n") {
+		line = strings.TrimSpace(line)
+		if line == "" {
+			continue
+		}
+		var ev map[string]any
+		if err := json.Unmarshal([]byte(line), &ev); err != nil {
+			continue
+		}
+		if ev["event"] == "stage_heartbeat" && ev["node_id"] == "a" {
+			heartbeats++
+			if _, ok := ev["since_last_output_s"]; !ok {
+				t.Error("heartbeat missing since_last_output_s field")
+			}
+			sinceOutput, _ := ev["since_last_output_s"].(float64)
+			if sinceOutput >= 1 {
+				hasQuietHeartbeat = true
+			}
+		}
+	}
+	if heartbeats < 2 {
+		t.Fatalf("expected at least 2 heartbeat events (some during quiet period), got %d", heartbeats)
+	}
+	if !hasQuietHeartbeat {
+		t.Fatal("expected at least one heartbeat with since_last_output_s >= 1 (quiet period heartbeat)")
+	}
+	t.Logf("found %d heartbeat events, quiet period heartbeats present", heartbeats)
+}
+
 func TestRunWithConfig_HeartbeatStopsAfterProcessExit(t *testing.T) {
 	events := runHeartbeatFixture(t)
 	endIdx := findEventIndex(events, "stage_attempt_end", "a")
diff --git a/internal/attractor/engine/codergen_router.go b/internal/attractor/engine/codergen_router.go
index 769cc809..4b817e46 100644
--- a/internal/attractor/engine/codergen_router.go
+++ b/internal/attractor/engine/codergen_router.go
@@ -328,21 +328,30 @@ func (r *CodergenRouter) runAPI(ctx context.Context, execCtx *Execution, node *m
 				ticker := time.NewTicker(interval)
 				defer ticker.Stop()
 				var lastCount int
+				lastEventTime := time.Now()
 				for {
 					select {
 					case <-ticker.C:
 						eventsMu.Lock()
 						count := len(events)
 						eventsMu.Unlock()
-						if count > lastCount {
+						eventsGrew := count > lastCount
+						if eventsGrew {
 							lastCount = count
-							if execCtx != nil && execCtx.Engine != nil {
-								execCtx.Engine.appendProgress(map[string]any{
-									"event":       "stage_heartbeat",
-									"node_id":     node.ID,
-									"elapsed_s":   int(time.Since(apiStart).Seconds()),
-									"event_count": count,
-								})
+							lastEventTime = time.Now()
+						}
+						if execCtx != nil && execCtx.Engine != nil {
+							ev := map[string]any{
+								"event":               "stage_heartbeat",
+								"node_id":             node.ID,
+								"elapsed_s":           int(time.Since(apiStart).Seconds()),
+								"event_count":         count,
+								"since_last_output_s": int(time.Since(lastEventTime).Seconds()),
+							}
+							if eventsGrew {
+								execCtx.Engine.appendProgress(ev)
+							} else {
+								execCtx.Engine.appendProgressLivenessOnly(ev)
 							}
 						}
 					case <-heartbeatStop:
@@ -1159,22 +1168,31 @@ func (r *CodergenRouter) runCLI(ctx context.Context, execCtx *Execution, node *m
 			ticker := time.NewTicker(interval)
 			defer ticker.Stop()
 			var lastStdoutSz, lastStderrSz int64
+			lastOutputTime := time.Now()
 			for {
 				select {
 				case <-ticker.C:
 					stdoutSz, _ := fileSize(stdoutPath)
 					stderrSz, _ := fileSize(stderrPath)
-					if stdoutSz > lastStdoutSz || stderrSz > lastStderrSz {
+					outputGrew := stdoutSz > lastStdoutSz || stderrSz > lastStderrSz
+					if outputGrew {
 						lastStdoutSz = stdoutSz
 						lastStderrSz = stderrSz
-						if execCtx != nil && execCtx.Engine != nil {
-							execCtx.Engine.appendProgress(map[string]any{
-								"event":        "stage_heartbeat",
-								"node_id":      node.ID,
-								"elapsed_s":    int(time.Since(start).Seconds()),
-								"stdout_bytes": stdoutSz,
-								"stderr_bytes": stderrSz,
-							})
+						lastOutputTime = time.Now()
+					}
+					if execCtx != nil && execCtx.Engine != nil {
+						ev := map[string]any{
+							"event":               "stage_heartbeat",
+							"node_id":             node.ID,
+							"elapsed_s":           int(time.Since(start).Seconds()),
+							"stdout_bytes":        stdoutSz,
+							"stderr_bytes":        stderrSz,
+							"since_last_output_s": int(time.Since(lastOutputTime).Seconds()),
+						}
+						if outputGrew {
+							execCtx.Engine.appendProgress(ev)
+						} else {
+							execCtx.Engine.appendProgressLivenessOnly(ev)
 						}
 					}
 				case <-heartbeatStop:
@@ -1845,6 +1863,24 @@ func emitCXDBToolTurns(ctx context.Context, eng *Engine, nodeID string, ev agent
 	}
 	runID := eng.Options.RunID
 	switch ev.Kind {
+	case agent.EventAssistantTextEnd:
+		text := strings.TrimSpace(fmt.Sprint(ev.Data["text"]))
+		// Keep a queryable assistant turn even when the model turn is tool-only.
+		if text == "" {
+			text = "[tool_use]"
+		}
+		if _, _, err := eng.CXDB.Append(ctx, "com.kilroy.attractor.AssistantMessage", 1, map[string]any{
+			"run_id":         runID,
+			"node_id":        nodeID,
+			"text":           truncate(text, 8_000),
+			"model":          "",
+			"input_tokens":   uint64(0),
+			"output_tokens":  uint64(0),
+			"tool_use_count": uint32(0),
+			"timestamp_ms":   nowMS(),
+		}); err != nil {
+			eng.Warn(fmt.Sprintf("cxdb append AssistantMessage failed (node=%s): %v", nodeID, err))
+		}
 	case agent.EventToolCallStart:
 		toolName := strings.TrimSpace(fmt.Sprint(ev.Data["tool_name"]))
 		callID := strings.TrimSpace(fmt.Sprint(ev.Data["call_id"]))
diff --git a/internal/attractor/engine/codergen_router_cxdb_test.go b/internal/attractor/engine/codergen_router_cxdb_test.go
new file mode 100644
index 00000000..0a7b6369
--- /dev/null
+++ b/internal/attractor/engine/codergen_router_cxdb_test.go
@@ -0,0 +1,118 @@
+package engine
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/danshapiro/kilroy/internal/agent"
+)
+
+func TestEmitCXDBToolTurns_EmitsAssistantToolCallAndToolResult(t *testing.T) {
+	srv := newCXDBTestServer(t)
+	eng := newTestEngineWithCXDB(t, srv)
+	ctx := context.Background()
+
+	emitCXDBToolTurns(ctx, eng, "node_api", agent.SessionEvent{
+		Kind:      agent.EventAssistantTextEnd,
+		Timestamp: time.Now(),
+		Data: map[string]any{
+			"text": "I will read the file.",
+		},
+	})
+	emitCXDBToolTurns(ctx, eng, "node_api", agent.SessionEvent{
+		Kind:      agent.EventToolCallStart,
+		Timestamp: time.Now(),
+		Data: map[string]any{
+			"tool_name":      "Read",
+			"call_id":        "toolu_123",
+			"arguments_json": `{"file_path":"/tmp/a.txt"}`,
+		},
+	})
+	emitCXDBToolTurns(ctx, eng, "node_api", agent.SessionEvent{
+		Kind:      agent.EventToolCallEnd,
+		Timestamp: time.Now(),
+		Data: map[string]any{
+			"tool_name":  "Read",
+			"call_id":    "toolu_123",
+			"full_output": "hello world",
+			"is_error":   false,
+		},
+	})
+
+	turns := srv.Turns(eng.CXDB.ContextID)
+	if len(turns) != 3 {
+		t.Fatalf("expected 3 turns, got %d", len(turns))
+	}
+
+	if turns[0]["type_id"] != "com.kilroy.attractor.AssistantMessage" {
+		t.Fatalf("turn[0] type_id: got %q", turns[0]["type_id"])
+	}
+	assistant := turns[0]["payload"].(map[string]any)
+	if assistant["text"] != "I will read the file." {
+		t.Fatalf("AssistantMessage text: got %q", assistant["text"])
+	}
+	if assistant["node_id"] != "node_api" {
+		t.Fatalf("AssistantMessage node_id: got %q", assistant["node_id"])
+	}
+	if assistant["run_id"] != "test-run" {
+		t.Fatalf("AssistantMessage run_id: got %q", assistant["run_id"])
+	}
+	if _, ok := assistant["timestamp_ms"]; !ok {
+		t.Fatal("AssistantMessage missing timestamp_ms")
+	}
+
+	if turns[1]["type_id"] != "com.kilroy.attractor.ToolCall" {
+		t.Fatalf("turn[1] type_id: got %q", turns[1]["type_id"])
+	}
+	toolCall := turns[1]["payload"].(map[string]any)
+	if toolCall["tool_name"] != "Read" {
+		t.Fatalf("ToolCall tool_name: got %q", toolCall["tool_name"])
+	}
+	if toolCall["call_id"] != "toolu_123" {
+		t.Fatalf("ToolCall call_id: got %q", toolCall["call_id"])
+	}
+
+	if turns[2]["type_id"] != "com.kilroy.attractor.ToolResult" {
+		t.Fatalf("turn[2] type_id: got %q", turns[2]["type_id"])
+	}
+	toolResult := turns[2]["payload"].(map[string]any)
+	if toolResult["tool_name"] != "Read" {
+		t.Fatalf("ToolResult tool_name: got %q", toolResult["tool_name"])
+	}
+	if toolResult["call_id"] != "toolu_123" {
+		t.Fatalf("ToolResult call_id: got %q", toolResult["call_id"])
+	}
+	if toolResult["output"] != "hello world" {
+		t.Fatalf("ToolResult output: got %q", toolResult["output"])
+	}
+	if toolResult["is_error"] != false {
+		t.Fatalf("ToolResult is_error: got %v", toolResult["is_error"])
+	}
+}
+
+func TestEmitCXDBToolTurns_AssistantTextFallbackForToolOnlyTurn(t *testing.T) {
+	srv := newCXDBTestServer(t)
+	eng := newTestEngineWithCXDB(t, srv)
+	ctx := context.Background()
+
+	emitCXDBToolTurns(ctx, eng, "node_api", agent.SessionEvent{
+		Kind:      agent.EventAssistantTextEnd,
+		Timestamp: time.Now(),
+		Data: map[string]any{
+			"text": "   ",
+		},
+	})
+
+	turns := srv.Turns(eng.CXDB.ContextID)
+	if len(turns) != 1 {
+		t.Fatalf("expected 1 turn, got %d", len(turns))
+	}
+	if turns[0]["type_id"] != "com.kilroy.attractor.AssistantMessage" {
+		t.Fatalf("type_id: got %q", turns[0]["type_id"])
+	}
+	payload := turns[0]["payload"].(map[string]any)
+	if payload["text"] != "[tool_use]" {
+		t.Fatalf("fallback text: got %q", payload["text"])
+	}
+}
diff --git a/internal/attractor/engine/engine.go b/internal/attractor/engine/engine.go
index 43aa0a2d..66e3ebfd 100644
--- a/internal/attractor/engine/engine.go
+++ b/internal/attractor/engine/engine.go
@@ -373,10 +373,14 @@ func (e *Engine) run(ctx context.Context) (res *Result, err error) {
 	runCtx, cancelRun := context.WithCancelCause(ctx)
 	defer cancelRun(nil)
 
+	var releaseOwnership func()
 	defer func() {
-		if err != nil {
+		if err != nil && !isRunOwnershipConflict(err) {
 			e.persistFatalOutcome(ctx, err)
 		}
+		if releaseOwnership != nil {
+			releaseOwnership()
+		}
 	}()
 
 	if e.Options.RepoPath == "" {
@@ -403,6 +407,10 @@ func (e *Engine) run(ctx context.Context) (res *Result, err error) {
 	if err := os.MkdirAll(e.LogsRoot, 0o755); err != nil {
 		return nil, err
 	}
+	releaseOwnership, err = acquireRunOwnership(e.LogsRoot, e.Options.RunID)
+	if err != nil {
+		return nil, err
+	}
 	// Record PID so attractor status can detect a running process.
 	_ = os.WriteFile(filepath.Join(e.LogsRoot, "run.pid"), []byte(strconv.Itoa(os.Getpid())), 0o644)
 	// Snapshot the run config for repeatability and resume.
@@ -1223,7 +1231,7 @@ func (e *Engine) executeWithRetry(ctx context.Context, node *model.Node, retries
 			_ = writeJSON(filepath.Join(stageDir, "status.json"), fo)
 			return fo, nil
 		}
-		if out.Status == runtime.StatusSuccess || out.Status == runtime.StatusPartialSuccess || out.Status == runtime.StatusSkipped {
+		if out.Status == runtime.StatusSuccess || out.Status == runtime.StatusDegradedSuccess || out.Status == runtime.StatusPartialSuccess || out.Status == runtime.StatusSkipped {
 			retries[node.ID] = 0
 			return out, nil
 		}
@@ -1957,7 +1965,7 @@ func checkGoalGates(g *model.Graph, outcomes map[string]runtime.Outcome) (bool,
 		if !strings.EqualFold(n.Attr("goal_gate", "false"), "true") {
 			continue
 		}
-		if out.Status != runtime.StatusSuccess && out.Status != runtime.StatusPartialSuccess {
+		if out.Status != runtime.StatusSuccess && out.Status != runtime.StatusDegradedSuccess && out.Status != runtime.StatusPartialSuccess {
 			return false, id
 		}
 	}
diff --git a/internal/attractor/engine/handlers.go b/internal/attractor/engine/handlers.go
index 8bcd9813..9567c2d1 100644
--- a/internal/attractor/engine/handlers.go
+++ b/internal/attractor/engine/handlers.go
@@ -4,7 +4,9 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
+	"io"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -235,25 +237,128 @@ type fallbackStatusPath struct {
 	source statusSource
 }
 
-func copyFirstValidFallbackStatus(stageStatusPath string, fallbackPaths []fallbackStatusPath) (statusSource, error) {
+type fallbackStatusFailureMode string
+
+const (
+	fallbackFailureModeNone           fallbackStatusFailureMode = ""
+	fallbackFailureModeMissing        fallbackStatusFailureMode = "missing"
+	fallbackFailureModeUnreadable     fallbackStatusFailureMode = "unreadable"
+	fallbackFailureModeCorrupt        fallbackStatusFailureMode = "corrupt"
+	fallbackFailureModeInvalidPayload fallbackStatusFailureMode = "invalid_payload"
+)
+
+const (
+	fallbackStatusDecodeMaxAttempts = 3
+	fallbackStatusDecodeBaseDelay   = 25 * time.Millisecond
+)
+
+func copyFirstValidFallbackStatus(stageStatusPath string, fallbackPaths []fallbackStatusPath) (statusSource, string, error) {
 	if _, err := os.Stat(stageStatusPath); err == nil {
-		return statusSourceCanonical, nil
+		return statusSourceCanonical, "", nil
 	}
+	issues := make([]string, 0, len(fallbackPaths))
 	for _, fallback := range fallbackPaths {
-		b, err := os.ReadFile(fallback.path)
+		b, mode, err := readAndDecodeFallbackStatusWithRetry(fallback.path)
 		if err != nil {
-			continue
-		}
-		if _, err := runtime.DecodeOutcomeJSON(b); err != nil {
+			issues = append(issues, formatFallbackStatusIssue(fallback, mode, err))
 			continue
 		}
 		if err := runtime.WriteFileAtomic(stageStatusPath, b); err != nil {
-			return statusSourceNone, err
+			return statusSourceNone, strings.Join(issues, "; "), err
 		}
 		_ = os.Remove(fallback.path)
-		return fallback.source, nil
+		return fallback.source, "", nil
+	}
+	return statusSourceNone, strings.Join(issues, "; "), nil
+}
+
+func readAndDecodeFallbackStatusWithRetry(path string) ([]byte, fallbackStatusFailureMode, error) {
+	var lastErr error
+	mode := fallbackFailureModeMissing
+	for attempt := 1; attempt <= fallbackStatusDecodeMaxAttempts; attempt++ {
+		b, err := os.ReadFile(path)
+		if err != nil {
+			lastErr = err
+			if os.IsNotExist(err) {
+				mode = fallbackFailureModeMissing
+			} else {
+				mode = fallbackFailureModeUnreadable
+			}
+			if attempt < fallbackStatusDecodeMaxAttempts && shouldRetryFallbackRead(mode, err) {
+				time.Sleep(backoffDelay(attempt))
+				continue
+			}
+			return nil, mode, lastErr
+		}
+		if _, err := runtime.DecodeOutcomeJSON(b); err != nil {
+			lastErr = err
+			mode = classifyFallbackDecodeError(b, err)
+			if attempt < fallbackStatusDecodeMaxAttempts && shouldRetryFallbackDecode(mode, b, err) {
+				time.Sleep(backoffDelay(attempt))
+				continue
+			}
+			return nil, mode, lastErr
+		}
+		return b, fallbackFailureModeNone, nil
+	}
+	return nil, mode, lastErr
+}
+
+func classifyFallbackDecodeError(raw []byte, err error) fallbackStatusFailureMode {
+	if isCorruptFallbackPayload(raw, err) {
+		return fallbackFailureModeCorrupt
+	}
+	return fallbackFailureModeInvalidPayload
+}
+
+func shouldRetryFallbackRead(mode fallbackStatusFailureMode, err error) bool {
+	if mode == fallbackFailureModeMissing {
+		// Missing fallback files are expected in normal flows; retrying them
+		// just adds latency before trying the next candidate path.
+		return false
+	}
+	return errors.Is(err, io.ErrUnexpectedEOF)
+}
+
+func shouldRetryFallbackDecode(mode fallbackStatusFailureMode, raw []byte, err error) bool {
+	return mode == fallbackFailureModeCorrupt && isCorruptFallbackPayload(raw, err)
+}
+
+func isCorruptFallbackPayload(raw []byte, err error) bool {
+	if len(strings.TrimSpace(string(raw))) == 0 {
+		return true
+	}
+	var syntaxErr *json.SyntaxError
+	if errors.As(err, &syntaxErr) {
+		return true
+	}
+	if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) {
+		return true
+	}
+	msg := strings.ToLower(strings.TrimSpace(err.Error()))
+	return strings.Contains(msg, "unexpected end of json input") || strings.Contains(msg, "invalid character")
+}
+
+func backoffDelay(attempt int) time.Duration {
+	if attempt <= 0 {
+		attempt = 1
+	}
+	return time.Duration(attempt) * fallbackStatusDecodeBaseDelay
+}
+
+func formatFallbackStatusIssue(fallback fallbackStatusPath, mode fallbackStatusFailureMode, err error) string {
+	switch mode {
+	case fallbackFailureModeMissing:
+		return fmt.Sprintf("fallback[%s] missing status artifact: %s", fallback.source, fallback.path)
+	case fallbackFailureModeUnreadable:
+		return fmt.Sprintf("fallback[%s] unreadable status artifact: %s (%v)", fallback.source, fallback.path, err)
+	case fallbackFailureModeCorrupt:
+		return fmt.Sprintf("fallback[%s] corrupt status artifact: %s (%v)", fallback.source, fallback.path, err)
+	case fallbackFailureModeInvalidPayload:
+		return fmt.Sprintf("fallback[%s] invalid status payload: %s (%v)", fallback.source, fallback.path, err)
+	default:
+		return fmt.Sprintf("fallback[%s] status ingestion error: %s (%v)", fallback.source, fallback.path, err)
 	}
-	return statusSourceNone, nil
 }
 
 func buildManualBoxFanInPromptPreamble(exec *Execution, node *model.Node) string {
@@ -468,20 +573,29 @@ func (h *CodergenHandler) Execute(ctx context.Context, exec *Execution, node *mo
 	// If the backend/agent wrote a worktree status.json, surface it to the engine by
 	// copying it into the authoritative stage directory location.
 	source := statusSourceNone
+	ingestionDiagnostic := ""
 	if len(worktreeStatusPaths) > 0 {
 		var err error
-		source, err = copyFirstValidFallbackStatus(stageStatusPath, worktreeStatusPaths)
+		source, ingestionDiagnostic, err = copyFirstValidFallbackStatus(stageStatusPath, worktreeStatusPaths)
 		if err != nil {
-			return runtime.Outcome{Status: runtime.StatusFail, FailureReason: err.Error()}, nil
+			reason := err.Error()
+			if strings.TrimSpace(ingestionDiagnostic) != "" {
+				reason = reason + "; " + strings.TrimSpace(ingestionDiagnostic)
+			}
+			return runtime.Outcome{Status: runtime.StatusFail, FailureReason: reason}, nil
 		}
 	}
 	if exec != nil && exec.Engine != nil {
-		exec.Engine.appendProgress(map[string]any{
+		progress := map[string]any{
 			"event":   "status_ingestion_decision",
 			"node_id": node.ID,
 			"source":  string(source),
 			"copied":  source == statusSourceWorktree || source == statusSourceDotAI,
-		})
+		}
+		if strings.TrimSpace(ingestionDiagnostic) != "" {
+			progress["diagnostic"] = strings.TrimSpace(ingestionDiagnostic)
+		}
+		exec.Engine.appendProgress(progress)
 	}
 
 	if out != nil {
@@ -523,9 +637,13 @@ func (h *CodergenHandler) Execute(ctx context.Context, exec *Execution, node *mo
 			},
 		}, nil
 	}
+	reason := "missing status.json (auto_status=false)"
+	if strings.TrimSpace(ingestionDiagnostic) != "" {
+		reason = reason + "; " + strings.TrimSpace(ingestionDiagnostic)
+	}
 	return runtime.Outcome{
 		Status:        runtime.StatusFail,
-		FailureReason: "missing status.json (auto_status=false)",
+		FailureReason: reason,
 		Notes:         "codergen completed without an outcome or status.json",
 		ContextUpdates: map[string]any{
 			"last_stage":    node.ID,
diff --git a/internal/attractor/engine/input_manifest_contract_test.go b/internal/attractor/engine/input_manifest_contract_test.go
index 210b4657..2cf0948e 100644
--- a/internal/attractor/engine/input_manifest_contract_test.go
+++ b/internal/attractor/engine/input_manifest_contract_test.go
@@ -73,6 +73,7 @@ digraph G {
 func TestInputManifestContract_DisabledSuppressesEnvPreambleAndEvents(t *testing.T) {
 	repo := initTestRepo(t)
 	logsRoot := t.TempDir()
+	t.Setenv(inputsManifestEnvKey, "/tmp/ambient-inputs-manifest.json")
 	mustWriteInputFile(t, filepath.Join(repo, ".ai", "definition_of_done.md"), "line by line")
 
 	cli := writeInputManifestProbeCLI(t, false)
diff --git a/internal/attractor/engine/input_materialization.go b/internal/attractor/engine/input_materialization.go
index 932c24e5..83bc2aa7 100644
--- a/internal/attractor/engine/input_materialization.go
+++ b/internal/attractor/engine/input_materialization.go
@@ -752,7 +752,7 @@ func isLikelyArtifactInputPath(path string) bool {
 	segments := strings.Split(normalized, "/")
 	for _, seg := range segments {
 		switch seg {
-		case ".git", ".jj", "logs", "benchmarks", "worktree", "node_modules", ".pnpm-store", ".venv", "venv", "__pycache__", ".pytest_cache", "dist-info", "managed":
+		case ".git", ".jj", "logs", "benchmarks", "worktree", ".worktrees", "node_modules", ".pnpm-store", ".venv", "venv", "__pycache__", ".pytest_cache", "dist-info", "managed", ".cargo-target":
 			return true
 		}
 	}
diff --git a/internal/attractor/engine/input_reference_scan.go b/internal/attractor/engine/input_reference_scan.go
index 761aabaa..16a61b3b 100644
--- a/internal/attractor/engine/input_reference_scan.go
+++ b/internal/attractor/engine/input_reference_scan.go
@@ -163,13 +163,17 @@ func looksLikeReferenceToken(token string) bool {
 	if strings.ContainsAny(token, "<>|") {
 		return false
 	}
+	// Reject tokens with spaces - they are natural language, not paths/globs.
+	if strings.ContainsAny(token, " \t") {
+		return false
+	}
 	if strings.Contains(token, "/") || strings.Contains(token, "\\") {
 		return true
 	}
 	if windowsAbsPathRE.MatchString(token) {
 		return true
 	}
-	if strings.ContainsAny(token, "*?[") {
+	if looksLikeGlobPattern(token) {
 		return true
 	}
 	return false
@@ -182,7 +186,7 @@ func looksLikeStructuredReferenceToken(token string) bool {
 	if strings.Contains(token, "](") || strings.ContainsAny(token, "<>|") {
 		return false
 	}
-	if strings.ContainsAny(token, "*?[") {
+	if looksLikeGlobPattern(token) {
 		return true
 	}
 	// Structured captures (markdown links/quoted tokens) may contain local file
@@ -194,8 +198,44 @@ func looksLikeStructuredReferenceToken(token string) bool {
 }
 
 func classifyReferenceKind(pattern string) InputReferenceKind {
-	if strings.ContainsAny(pattern, "*?[") {
+	if looksLikeGlobPattern(pattern) {
 		return InputReferenceKindGlob
 	}
 	return InputReferenceKindPath
 }
+
+// looksLikeGlobPattern returns true if token contains valid glob metacharacters.
+// Tokens containing * or ? are always treated as globs. Tokens containing [
+// are only treated as globs if the bracket forms a valid character class (i.e.
+// has a matching ]) AND the token also has path-like context (a path separator,
+// * or ?) - this prevents natural language fragments like
+// "DEFAULT_TOOL_LIMITS[tool_name" and programming constructs like
+// "map[string]any" from being classified as globs.
+func looksLikeGlobPattern(token string) bool {
+	if strings.ContainsAny(token, "*?") {
+		return true
+	}
+	if !strings.Contains(token, "[") {
+		return false
+	}
+	// Validate that every [ has a matching ] (valid character class).
+	hasValidBracket := false
+	for i := 0; i < len(token); i++ {
+		if token[i] == '[' {
+			j := strings.IndexByte(token[i+1:], ']')
+			if j < 0 {
+				// Unmatched [ - not a valid glob.
+				return false
+			}
+			hasValidBracket = true
+			// Skip past the matched ]
+			i += j + 1
+		}
+	}
+	if !hasValidBracket {
+		return false
+	}
+	// Brackets alone (e.g. "map[string]any") are not enough to be a glob.
+	// Require a path separator to disambiguate from programming constructs.
+	return strings.ContainsAny(token, "/\\")
+}
diff --git a/internal/attractor/engine/input_reference_scan_test.go b/internal/attractor/engine/input_reference_scan_test.go
index 99425ff4..7850b19a 100644
--- a/internal/attractor/engine/input_reference_scan_test.go
+++ b/internal/attractor/engine/input_reference_scan_test.go
@@ -44,6 +44,59 @@ func TestInputReferenceScan_IgnoresURLsAndParsesWindowsGlobToken(t *testing.T) {
 	requireRefKind(t, got, "docs/spec.md", InputReferenceKindPath)
 }
 
+func TestInputReferenceScan_RejectsNaturalLanguageWithBrackets(t *testing.T) {
+	scanner := deterministicInputReferenceScanner{}
+	// These tokens from issue #48 should NOT be classified as globs.
+	badTokens := []string{
+		`you are wielding ([ch]) [weapon`,
+		`DEFAULT_TOOL_LIMITS[tool_name`,
+		`array[index`,
+		`map[string]any`,
+	}
+	for _, token := range badTokens {
+		refs := scanner.Scan("test.md", []byte(token))
+		for _, ref := range refs {
+			if ref.Kind == InputReferenceKindGlob {
+				t.Errorf("token %q should not be classified as glob, but got pattern=%q kind=%q", token, ref.Pattern, ref.Kind)
+			}
+		}
+	}
+}
+
+func TestInputReferenceScan_AcceptsValidGlobBrackets(t *testing.T) {
+	scanner := deterministicInputReferenceScanner{}
+	// These ARE valid globs with matched brackets.
+	content := strings.Join([]string{
+		`"src/[abc]/*.go"`,
+		`"docs/[a-z]*.md"`,
+	}, "\n")
+	refs := scanner.Scan("test.md", []byte(content))
+	got := map[string]InputReferenceKind{}
+	for _, ref := range refs {
+		got[ref.Pattern] = ref.Kind
+	}
+	requireRefKind(t, got, "src/[abc]/*.go", InputReferenceKindGlob)
+	requireRefKind(t, got, "docs/[a-z]*.md", InputReferenceKindGlob)
+}
+
+func TestIsLikelyArtifactInputPath_ExcludesWorktrees(t *testing.T) {
+	cases := []struct {
+		path string
+		want bool
+	}{
+		{".worktrees/rogue-logs/worktree/.cargo-target/foo.rs", true},
+		{".worktrees/some-branch/src/main.go", true},
+		{"src/.cargo-target/debug/build/foo.o", true},
+		{"src/main.go", false},
+		{"docs/spec.md", false},
+	}
+	for _, tc := range cases {
+		if got := isLikelyArtifactInputPath(tc.path); got != tc.want {
+			t.Errorf("isLikelyArtifactInputPath(%q) = %v, want %v", tc.path, got, tc.want)
+		}
+	}
+}
+
 func requireRefKind(t *testing.T, got map[string]InputReferenceKind, pattern string, want InputReferenceKind) {
 	t.Helper()
 	kind, ok := got[pattern]
diff --git a/internal/attractor/engine/main_test.go b/internal/attractor/engine/main_test.go
new file mode 100644
index 00000000..18227096
--- /dev/null
+++ b/internal/attractor/engine/main_test.go
@@ -0,0 +1,27 @@
+package engine
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestMain(m *testing.M) {
+	stateRoot, err := os.MkdirTemp("", "kilroy-engine-test-state-*")
+	if err != nil {
+		_, _ = os.Stderr.WriteString("failed to create temp state root: " + err.Error() + "\n")
+		os.Exit(2)
+	}
+
+	if strings.TrimSpace(os.Getenv("XDG_STATE_HOME")) == "" {
+		_ = os.Setenv("XDG_STATE_HOME", stateRoot)
+	}
+	if strings.TrimSpace(os.Getenv("KILROY_CODEX_STATE_BASE")) == "" {
+		_ = os.Setenv("KILROY_CODEX_STATE_BASE", filepath.Join(stateRoot, "kilroy", "attractor", "codex-state"))
+	}
+
+	code := m.Run()
+	_ = os.RemoveAll(stateRoot)
+	os.Exit(code)
+}
diff --git a/internal/attractor/engine/node_env.go b/internal/attractor/engine/node_env.go
index 317bf6a2..b21479b2 100644
--- a/internal/attractor/engine/node_env.go
+++ b/internal/attractor/engine/node_env.go
@@ -15,12 +15,27 @@ const (
 	inputsManifestEnvKey = "KILROY_INPUTS_MANIFEST_PATH"
 )
 
+var baseNodeEnvStripKeys = []string{
+	"CLAUDECODE",
+	runIDEnvKey,
+	nodeIDEnvKey,
+	logsRootEnvKey,
+	stageLogsDirEnvKey,
+	worktreeDirEnvKey,
+	inputsManifestEnvKey,
+	stageStatusPathEnvKey,
+	stageStatusFallbackPathEnvKey,
+}
+
 // buildBaseNodeEnv constructs the base environment for any node execution.
 // It starts from os.Environ(), strips CLAUDECODE, then applies resolved
 // artifact policy environment variables.
 func buildBaseNodeEnv(rp ResolvedArtifactPolicy) []string {
 	base := os.Environ()
-	base = stripEnvKey(base, "CLAUDECODE")
+	// Scrub inherited process state that can corrupt stage-local contracts.
+	for _, key := range baseNodeEnvStripKeys {
+		base = stripEnvKey(base, key)
+	}
 
 	overrides := make(map[string]string, len(rp.Env.Vars))
 	for k, v := range rp.Env.Vars {
diff --git a/internal/attractor/engine/node_env_test.go b/internal/attractor/engine/node_env_test.go
index 94f770bf..01fe8304 100644
--- a/internal/attractor/engine/node_env_test.go
+++ b/internal/attractor/engine/node_env_test.go
@@ -59,6 +59,29 @@ func TestBuildBaseNodeEnv_StripsClaudeCode(t *testing.T) {
 	}
 }
 
+func TestBuildBaseNodeEnv_StripsKilroyContractEnvKeys(t *testing.T) {
+	outerValues := map[string]string{
+		runIDEnvKey:                   "outer-run",
+		nodeIDEnvKey:                  "outer-node",
+		logsRootEnvKey:                "/tmp/outer-logs",
+		stageLogsDirEnvKey:            "/tmp/outer-logs/outer-node",
+		worktreeDirEnvKey:             "/tmp/outer-worktree",
+		inputsManifestEnvKey:          "/tmp/outer-inputs-manifest.json",
+		stageStatusPathEnvKey:         "/tmp/outer-status.json",
+		stageStatusFallbackPathEnvKey: "/tmp/outer-fallback-status.json",
+	}
+	for key, value := range outerValues {
+		t.Setenv(key, value)
+	}
+
+	env := buildBaseNodeEnv(ResolvedArtifactPolicy{})
+	for key := range outerValues {
+		if envHasKey(env, key) {
+			t.Fatalf("%s should be stripped from base env", key)
+		}
+	}
+}
+
 func TestBuildBaseNodeEnv_PreservesExplicitToolchainPaths(t *testing.T) {
 	home := t.TempDir()
 	cargoHome := filepath.Join(home, ".cargo")
diff --git a/internal/attractor/engine/progress.go b/internal/attractor/engine/progress.go
index ba25b9f1..07a5c636 100644
--- a/internal/attractor/engine/progress.go
+++ b/internal/attractor/engine/progress.go
@@ -17,6 +17,17 @@ import (
 //
 // This is best-effort: progress logging must never block or fail a run.
 func (e *Engine) appendProgress(ev map[string]any) {
+	e.appendProgressImpl(ev, true)
+}
+
+// appendProgressLivenessOnly writes a progress event to progress.ndjson and
+// live.json for observability but does NOT reset the stall watchdog timer.
+// Use this for unconditional heartbeat emissions that should not mask true stalls.
+func (e *Engine) appendProgressLivenessOnly(ev map[string]any) {
+	e.appendProgressImpl(ev, false)
+}
+
+func (e *Engine) appendProgressImpl(ev map[string]any, updateStallTimer bool) {
 	if e == nil {
 		return
 	}
@@ -47,7 +58,9 @@ func (e *Engine) appendProgress(ev map[string]any) {
 
 	e.progressMu.Lock()
 	defer e.progressMu.Unlock()
-	e.lastProgressAt = now
+	if updateStallTimer {
+		e.lastProgressAt = now
+	}
 
 	// Append to progress.ndjson.
 	// Intentionally open/close on each event so writes are immediately flushed
diff --git a/internal/attractor/engine/prompts/stage_status_contract_preamble.tmpl b/internal/attractor/engine/prompts/stage_status_contract_preamble.tmpl
index 5522ce6b..eb48bef4 100644
--- a/internal/attractor/engine/prompts/stage_status_contract_preamble.tmpl
+++ b/internal/attractor/engine/prompts/stage_status_contract_preamble.tmpl
@@ -4,3 +4,6 @@ ${{.StageStatusFallbackPathEnvKey}} = {{.FallbackPath}}
 - Write your status JSON to ${{.StageStatusPathEnvKey}}. If that write fails, use run-scoped fallback ${{.StageStatusFallbackPathEnvKey}}.
 - Do not write status.json to nested module directories.
 - To end this stage: write the status file, then return your response. Do NOT call close_agent or any session-management tool.
+- Verification reporting: if you ran verification commands (tests, linters, type checks) and any failed or were blocked by infra issues (missing tools, DNS errors, network failures), you MUST set status to "degraded_success" instead of "success" and include a "verification" object:
+  {"status": "degraded_success", "verification": {"status": "blocked", "blocked_reason": "npm registry DNS failure", "commands": [{"command": "npx tsc", "exit_code": 1, "blocked": true, "reason": "EAI_AGAIN"}]}, ...}
+- Use "success" only when all verification commands actually executed and passed, or when no verification was needed.
diff --git a/internal/attractor/engine/resume.go b/internal/attractor/engine/resume.go
index bb71bcdc..eed7708f 100644
--- a/internal/attractor/engine/resume.go
+++ b/internal/attractor/engine/resume.go
@@ -70,26 +70,26 @@ func resumeFromLogsRoot(ctx context.Context, logsRoot string, ov ResumeOverrides
 		runID         string
 		checkpointSHA string
 		eng           *Engine
+		releaseLock   func()
 	)
 	defer func() {
-		if err == nil {
-			return
-		}
-		if eng != nil {
-			eng.persistFatalOutcome(ctx, err)
-			return
-		}
-		if strings.TrimSpace(logsRoot) == "" || strings.TrimSpace(runID) == "" {
-			return
+		if err != nil && !isRunOwnershipConflict(err) {
+			if eng != nil {
+				eng.persistFatalOutcome(ctx, err)
+			} else if strings.TrimSpace(logsRoot) != "" && strings.TrimSpace(runID) != "" {
+				final := runtime.FinalOutcome{
+					Timestamp:         time.Now().UTC(),
+					Status:            runtime.FinalFail,
+					RunID:             runID,
+					FinalGitCommitSHA: strings.TrimSpace(checkpointSHA),
+					FailureReason:     strings.TrimSpace(err.Error()),
+				}
+				_ = final.Save(filepath.Join(logsRoot, "final.json"))
+			}
 		}
-		final := runtime.FinalOutcome{
-			Timestamp:         time.Now().UTC(),
-			Status:            runtime.FinalFail,
-			RunID:             runID,
-			FinalGitCommitSHA: strings.TrimSpace(checkpointSHA),
-			FailureReason:     strings.TrimSpace(err.Error()),
+		if releaseLock != nil {
+			releaseLock()
 		}
-		_ = final.Save(filepath.Join(logsRoot, "final.json"))
 	}()
 
 	m, err := loadManifest(filepath.Join(logsRoot, "manifest.json"))
@@ -97,6 +97,10 @@ func resumeFromLogsRoot(ctx context.Context, logsRoot string, ov ResumeOverrides
 		return nil, err
 	}
 	runID = strings.TrimSpace(m.RunID)
+	releaseLock, err = acquireRunOwnership(logsRoot, runID)
+	if err != nil {
+		return nil, err
+	}
 	cp, err := runtime.LoadCheckpoint(filepath.Join(logsRoot, "checkpoint.json"))
 	if err != nil {
 		return nil, err
diff --git a/internal/attractor/engine/run_ownership_lock.go b/internal/attractor/engine/run_ownership_lock.go
new file mode 100644
index 00000000..cdddfbd3
--- /dev/null
+++ b/internal/attractor/engine/run_ownership_lock.go
@@ -0,0 +1,185 @@
+package engine
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"github.com/danshapiro/kilroy/internal/attractor/procutil"
+)
+
+const (
+	runOwnershipLockFile        = "run.lock.json"
+	runOwnershipAcquireAttempts = 20
+	runOwnershipRetryDelay      = 25 * time.Millisecond
+)
+
+type runOwnershipRecord struct {
+	PID          int    `json:"pid"`
+	PIDStartTime uint64 `json:"pid_start_time,omitempty"`
+	RunID        string `json:"run_id,omitempty"`
+	AcquiredAt   string `json:"acquired_at,omitempty"`
+}
+
+type runOwnershipConflictError struct {
+	LogsRoot    string
+	LockPath    string
+	ExistingPID int
+	ExistingRun string
+	Reason      string
+}
+
+func (e *runOwnershipConflictError) Error() string {
+	if e == nil {
+		return "run ownership conflict"
+	}
+	msg := fmt.Sprintf("logs_root %q is already owned", e.LogsRoot)
+	if e.ExistingPID > 0 {
+		msg += fmt.Sprintf(" by a live process (pid=%d)", e.ExistingPID)
+	} else {
+		msg += " by another process"
+	}
+	if strings.TrimSpace(e.ExistingRun) != "" {
+		msg += fmt.Sprintf(" run_id=%q", e.ExistingRun)
+	}
+	if strings.TrimSpace(e.LockPath) != "" {
+		msg += fmt.Sprintf(" lock=%q", e.LockPath)
+	}
+	if strings.TrimSpace(e.Reason) != "" {
+		msg += fmt.Sprintf(" (%s)", strings.TrimSpace(e.Reason))
+	}
+	return msg
+}
+
+func isRunOwnershipConflict(err error) bool {
+	var ownershipErr *runOwnershipConflictError
+	return errors.As(err, &ownershipErr)
+}
+
+func acquireRunOwnership(logsRoot, runID string) (func(), error) {
+	root := strings.TrimSpace(logsRoot)
+	if root == "" {
+		return nil, fmt.Errorf("logs_root is required")
+	}
+	lockPath := filepath.Join(root, runOwnershipLockFile)
+	record := runOwnershipRecord{
+		PID:        os.Getpid(),
+		RunID:      strings.TrimSpace(runID),
+		AcquiredAt: time.Now().UTC().Format(time.RFC3339Nano),
+	}
+	if startTime, err := procutil.ReadPIDStartTime(record.PID); err == nil && startTime > 0 {
+		record.PIDStartTime = startTime
+	}
+	payload, err := json.MarshalIndent(record, "", "  ")
+	if err != nil {
+		return nil, fmt.Errorf("encode run ownership: %w", err)
+	}
+	// Keep payload newline-friendly for manual inspection.
+	payload = append(payload, '\n')
+
+	var lastReadErr error
+	for attempts := 0; attempts < runOwnershipAcquireAttempts; attempts++ {
+		f, openErr := os.OpenFile(lockPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0o644)
+		if openErr == nil {
+			if _, writeErr := f.Write(payload); writeErr != nil {
+				_ = f.Close()
+				_ = os.Remove(lockPath)
+				return nil, fmt.Errorf("write run ownership lock %q: %w", lockPath, writeErr)
+			}
+			if closeErr := f.Close(); closeErr != nil {
+				_ = os.Remove(lockPath)
+				return nil, fmt.Errorf("close run ownership lock %q: %w", lockPath, closeErr)
+			}
+			pid := record.PID
+			start := record.PIDStartTime
+			return func() {
+				releaseRunOwnership(lockPath, pid, start)
+			}, nil
+		}
+		if !errors.Is(openErr, os.ErrExist) {
+			return nil, fmt.Errorf("create run ownership lock %q: %w", lockPath, openErr)
+		}
+
+		existing, readErr := readRunOwnership(lockPath)
+		if readErr != nil {
+			lastReadErr = readErr
+			time.Sleep(runOwnershipRetryDelay)
+			continue
+		}
+
+		if runOwnershipMatchesLiveProcess(existing) {
+			return nil, &runOwnershipConflictError{
+				LogsRoot:    root,
+				LockPath:    lockPath,
+				ExistingPID: existing.PID,
+				ExistingRun: strings.TrimSpace(existing.RunID),
+			}
+		}
+		// Stale ownership record, best-effort cleanup and retry.
+		if removeErr := os.Remove(lockPath); removeErr != nil && !errors.Is(removeErr, os.ErrNotExist) {
+			return nil, fmt.Errorf("remove stale run ownership lock %q: %w", lockPath, removeErr)
+		}
+	}
+	if lastReadErr != nil {
+		return nil, &runOwnershipConflictError{
+			LogsRoot: root,
+			LockPath: lockPath,
+			Reason:   fmt.Sprintf("ownership lock unreadable after retries: %v", lastReadErr),
+		}
+	}
+	return nil, fmt.Errorf("acquire run ownership lock %q: exhausted retries", lockPath)
+}
+
+func readRunOwnership(lockPath string) (runOwnershipRecord, error) {
+	b, err := os.ReadFile(lockPath)
+	if err != nil {
+		return runOwnershipRecord{}, err
+	}
+	if strings.TrimSpace(string(b)) == "" {
+		return runOwnershipRecord{}, fmt.Errorf("empty lock payload")
+	}
+	var rec runOwnershipRecord
+	if err := json.Unmarshal(b, &rec); err != nil {
+		return runOwnershipRecord{}, err
+	}
+	if rec.PID <= 0 {
+		return runOwnershipRecord{}, fmt.Errorf("invalid pid %d", rec.PID)
+	}
+	return rec, nil
+}
+
+func runOwnershipMatchesLiveProcess(rec runOwnershipRecord) bool {
+	if rec.PID <= 0 || !procutil.PIDAlive(rec.PID) {
+		return false
+	}
+	if rec.PIDStartTime == 0 {
+		return true
+	}
+	start, err := procutil.ReadPIDStartTime(rec.PID)
+	if err != nil || start == 0 {
+		// Cannot disambiguate; keep conservative conflict behavior.
+		return true
+	}
+	return start == rec.PIDStartTime
+}
+
+func releaseRunOwnership(lockPath string, ownerPID int, ownerStartTime uint64) {
+	if strings.TrimSpace(lockPath) == "" || ownerPID <= 0 {
+		return
+	}
+	rec, err := readRunOwnership(lockPath)
+	if err != nil {
+		return
+	}
+	if rec.PID != ownerPID {
+		return
+	}
+	if ownerStartTime > 0 && rec.PIDStartTime > 0 && rec.PIDStartTime != ownerStartTime {
+		return
+	}
+	_ = os.Remove(lockPath)
+}
diff --git a/internal/attractor/engine/run_ownership_lock_test.go b/internal/attractor/engine/run_ownership_lock_test.go
new file mode 100644
index 00000000..e6c286a6
--- /dev/null
+++ b/internal/attractor/engine/run_ownership_lock_test.go
@@ -0,0 +1,297 @@
+package engine
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"testing"
+	"time"
+
+	"github.com/danshapiro/kilroy/internal/attractor/procutil"
+)
+
+func TestAcquireRunOwnership_RejectsLiveOwner(t *testing.T) {
+	t.Parallel()
+
+	logsRoot := t.TempDir()
+	lockPath := filepath.Join(logsRoot, runOwnershipLockFile)
+	ownerPID, ownerStart := startSleepingProcess(t)
+	owner := runOwnershipRecord{
+		PID:          ownerPID,
+		PIDStartTime: ownerStart,
+		RunID:        "existing-run",
+	}
+	if err := writeOwnershipRecord(lockPath, owner); err != nil {
+		t.Fatalf("writeOwnershipRecord: %v", err)
+	}
+
+	_, err := acquireRunOwnership(logsRoot, "new-run")
+	if err == nil {
+		t.Fatalf("expected ownership conflict error, got nil")
+	}
+	var ownershipErr *runOwnershipConflictError
+	if !errors.As(err, &ownershipErr) {
+		t.Fatalf("expected runOwnershipConflictError, got %T (%v)", err, err)
+	}
+	if ownershipErr.ExistingPID != ownerPID {
+		t.Fatalf("existing pid: got %d want %d", ownershipErr.ExistingPID, ownerPID)
+	}
+}
+
+func TestAcquireRunOwnership_ReclaimsStaleOwner(t *testing.T) {
+	t.Parallel()
+
+	logsRoot := t.TempDir()
+	lockPath := filepath.Join(logsRoot, runOwnershipLockFile)
+	owner := runOwnershipRecord{
+		PID:          999999, // best-effort stale PID
+		PIDStartTime: 123456,
+		RunID:        "stale-run",
+	}
+	if err := writeOwnershipRecord(lockPath, owner); err != nil {
+		t.Fatalf("writeOwnershipRecord: %v", err)
+	}
+
+	release, err := acquireRunOwnership(logsRoot, "new-run")
+	if err != nil {
+		t.Fatalf("acquireRunOwnership: %v", err)
+	}
+	defer release()
+
+	got, err := readRunOwnership(lockPath)
+	if err != nil {
+		t.Fatalf("readRunOwnership: %v", err)
+	}
+	if got.PID != os.Getpid() {
+		t.Fatalf("owner pid: got %d want %d", got.PID, os.Getpid())
+	}
+	if got.RunID != "new-run" {
+		t.Fatalf("owner run_id: got %q want %q", got.RunID, "new-run")
+	}
+}
+
+func TestAcquireRunOwnership_UnreadableLockConflicts(t *testing.T) {
+	t.Parallel()
+
+	logsRoot := t.TempDir()
+	lockPath := filepath.Join(logsRoot, runOwnershipLockFile)
+	if err := os.WriteFile(lockPath, []byte("{"), 0o644); err != nil {
+		t.Fatalf("write unreadable lock: %v", err)
+	}
+
+	_, err := acquireRunOwnership(logsRoot, "new-run")
+	if err != nil {
+		var ownershipErr *runOwnershipConflictError
+		if !errors.As(err, &ownershipErr) {
+			t.Fatalf("expected runOwnershipConflictError, got %T (%v)", err, err)
+		}
+		if ownershipErr.Reason == "" {
+			t.Fatalf("expected conflict reason for unreadable lock, got empty reason")
+		}
+	} else {
+		t.Fatalf("expected ownership conflict for unreadable lock, got nil")
+	}
+	if _, statErr := os.Stat(lockPath); statErr != nil {
+		t.Fatalf("expected unreadable lock to be preserved, stat err: %v", statErr)
+	}
+}
+
+func TestAcquireRunOwnership_RetriesUnreadableLockUntilConflict(t *testing.T) {
+	t.Parallel()
+
+	logsRoot := t.TempDir()
+	lockPath := filepath.Join(logsRoot, runOwnershipLockFile)
+	owner := ownerRecordForCurrentPID("existing-run")
+
+	created := make(chan struct{})
+	done := make(chan error, 1)
+	go func() {
+		f, err := os.OpenFile(lockPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0o644)
+		if err != nil {
+			close(created)
+			done <- err
+			return
+		}
+		close(created)
+		defer func() {
+			_ = f.Close()
+		}()
+		time.Sleep(120 * time.Millisecond)
+		b, err := json.Marshal(owner)
+		if err != nil {
+			done <- err
+			return
+		}
+		_, err = f.Write(b)
+		done <- err
+	}()
+	<-created
+
+	_, err := acquireRunOwnership(logsRoot, "new-run")
+	if err == nil {
+		t.Fatalf("expected ownership conflict error, got nil")
+	}
+	if !isRunOwnershipConflict(err) {
+		t.Fatalf("expected run ownership conflict, got: %v", err)
+	}
+	if writeErr := <-done; writeErr != nil {
+		t.Fatalf("slow owner write failed: %v", writeErr)
+	}
+}
+
+func TestAcquireRunOwnership_ReclaimsPIDStartTimeMismatch(t *testing.T) {
+	t.Parallel()
+
+	currentStart, err := procutil.ReadPIDStartTime(os.Getpid())
+	if err != nil || currentStart == 0 {
+		t.Skip("pid start time unavailable on this platform")
+	}
+
+	logsRoot := t.TempDir()
+	lockPath := filepath.Join(logsRoot, runOwnershipLockFile)
+	owner := runOwnershipRecord{
+		PID:          os.Getpid(),
+		PIDStartTime: currentStart + 1, // force mismatch with current process identity
+		RunID:        "stale-owner",
+	}
+	if err := writeOwnershipRecord(lockPath, owner); err != nil {
+		t.Fatalf("writeOwnershipRecord: %v", err)
+	}
+
+	release, err := acquireRunOwnership(logsRoot, "new-run")
+	if err != nil {
+		t.Fatalf("acquireRunOwnership: %v", err)
+	}
+	defer release()
+
+	got, err := readRunOwnership(lockPath)
+	if err != nil {
+		t.Fatalf("readRunOwnership: %v", err)
+	}
+	if got.RunID != "new-run" {
+		t.Fatalf("owner run_id: got %q want %q", got.RunID, "new-run")
+	}
+}
+
+func TestReleaseRunOwnership_DoesNotRemoveForeignOwner(t *testing.T) {
+	t.Parallel()
+
+	logsRoot := t.TempDir()
+	lockPath := filepath.Join(logsRoot, runOwnershipLockFile)
+	owner := ownerRecordForCurrentPID("owner-run")
+	if err := writeOwnershipRecord(lockPath, owner); err != nil {
+		t.Fatalf("writeOwnershipRecord: %v", err)
+	}
+
+	releaseRunOwnership(lockPath, os.Getpid()+1, 0)
+	if _, err := os.Stat(lockPath); err != nil {
+		t.Fatalf("expected lock to remain for foreign owner, stat err: %v", err)
+	}
+}
+
+func TestReleaseRunOwnership_DoesNotRemoveMismatchedStartTime(t *testing.T) {
+	t.Parallel()
+
+	currentStart, err := procutil.ReadPIDStartTime(os.Getpid())
+	if err != nil || currentStart == 0 {
+		t.Skip("pid start time unavailable on this platform")
+	}
+
+	logsRoot := t.TempDir()
+	lockPath := filepath.Join(logsRoot, runOwnershipLockFile)
+	owner := runOwnershipRecord{
+		PID:          os.Getpid(),
+		PIDStartTime: currentStart + 1,
+		RunID:        "owner-run",
+	}
+	if err := writeOwnershipRecord(lockPath, owner); err != nil {
+		t.Fatalf("writeOwnershipRecord: %v", err)
+	}
+
+	releaseRunOwnership(lockPath, os.Getpid(), currentStart)
+	if _, err := os.Stat(lockPath); err != nil {
+		t.Fatalf("expected lock to remain when start time mismatches, stat err: %v", err)
+	}
+}
+
+func TestRun_OwnershipConflict_DoesNotWriteFinalJSON(t *testing.T) {
+	t.Parallel()
+
+	repo := initTestRepo(t)
+	logsRoot := t.TempDir()
+	lockPath := filepath.Join(logsRoot, runOwnershipLockFile)
+	ownerPID, ownerStart := startSleepingProcess(t)
+	owner := runOwnershipRecord{
+		PID:          ownerPID,
+		PIDStartTime: ownerStart,
+		RunID:        "existing-run",
+	}
+	if err := writeOwnershipRecord(lockPath, owner); err != nil {
+		t.Fatalf("writeOwnershipRecord: %v", err)
+	}
+
+	dot := []byte(`
+digraph G {
+  start [shape=Mdiamond]
+  exit  [shape=Msquare]
+  start -> exit
+}
+`)
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	_, err := Run(ctx, dot, RunOptions{
+		RepoPath: repo,
+		RunID:    "ownership-conflict",
+		LogsRoot: logsRoot,
+	})
+	if err == nil {
+		t.Fatalf("expected ownership conflict error, got nil")
+	}
+	if !isRunOwnershipConflict(err) {
+		t.Fatalf("expected ownership conflict, got: %v", err)
+	}
+	if _, statErr := os.Stat(filepath.Join(logsRoot, "final.json")); !errors.Is(statErr, os.ErrNotExist) {
+		t.Fatalf("expected no final.json for ownership conflict, stat err=%v", statErr)
+	}
+}
+
+func writeOwnershipRecord(path string, rec runOwnershipRecord) error {
+	b, err := json.Marshal(rec)
+	if err != nil {
+		return err
+	}
+	return os.WriteFile(path, b, 0o644)
+}
+
+func ownerRecordForCurrentPID(runID string) runOwnershipRecord {
+	rec := runOwnershipRecord{
+		PID:   os.Getpid(),
+		RunID: runID,
+	}
+	if start, err := procutil.ReadPIDStartTime(rec.PID); err == nil && start > 0 {
+		rec.PIDStartTime = start
+	}
+	return rec
+}
+
+func startSleepingProcess(t *testing.T) (int, uint64) {
+	t.Helper()
+	cmd := exec.Command("sleep", "60")
+	if err := cmd.Start(); err != nil {
+		t.Fatalf("start sleep process: %v", err)
+	}
+	pid := cmd.Process.Pid
+	var start uint64
+	if s, err := procutil.ReadPIDStartTime(pid); err == nil {
+		start = s
+	}
+	t.Cleanup(func() {
+		_ = cmd.Process.Kill()
+		_, _ = cmd.Process.Wait()
+	})
+	return pid, start
+}
diff --git a/internal/attractor/engine/status_ingestion_retry_test.go b/internal/attractor/engine/status_ingestion_retry_test.go
new file mode 100644
index 00000000..ce730c31
--- /dev/null
+++ b/internal/attractor/engine/status_ingestion_retry_test.go
@@ -0,0 +1,181 @@
+package engine
+
+import (
+	"errors"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/danshapiro/kilroy/internal/attractor/runtime"
+)
+
+func TestCopyFirstValidFallbackStatus_CanonicalStageStatusWins(t *testing.T) {
+	tmp := t.TempDir()
+	stageStatusPath := filepath.Join(tmp, "logs", "a", "status.json")
+	fallbackPath := filepath.Join(tmp, "status.json")
+
+	if err := runtime.WriteFileAtomic(stageStatusPath, []byte(`{"status":"success","notes":"canonical"}`)); err != nil {
+		t.Fatalf("write canonical status: %v", err)
+	}
+	if err := os.WriteFile(fallbackPath, []byte(`{"status":"fail","failure_reason":"fallback"}`), 0o644); err != nil {
+		t.Fatalf("write fallback status: %v", err)
+	}
+
+	source, diagnostic, err := copyFirstValidFallbackStatus(stageStatusPath, []fallbackStatusPath{
+		{path: fallbackPath, source: statusSourceWorktree},
+	})
+	if err != nil {
+		t.Fatalf("copyFirstValidFallbackStatus: %v", err)
+	}
+	if source != statusSourceCanonical {
+		t.Fatalf("source=%q want %q", source, statusSourceCanonical)
+	}
+	if strings.TrimSpace(diagnostic) != "" {
+		t.Fatalf("diagnostic=%q want empty", diagnostic)
+	}
+
+	b, err := os.ReadFile(stageStatusPath)
+	if err != nil {
+		t.Fatalf("read stage status: %v", err)
+	}
+	out, err := runtime.DecodeOutcomeJSON(b)
+	if err != nil {
+		t.Fatalf("decode stage status: %v", err)
+	}
+	if out.Status != runtime.StatusSuccess {
+		t.Fatalf("status=%q want %q", out.Status, runtime.StatusSuccess)
+	}
+}
+
+func TestCopyFirstValidFallbackStatus_MissingFallbackIsDiagnosed(t *testing.T) {
+	tmp := t.TempDir()
+	stageStatusPath := filepath.Join(tmp, "logs", "a", "status.json")
+	missingPath := filepath.Join(tmp, "missing-status.json")
+
+	source, diagnostic, err := copyFirstValidFallbackStatus(stageStatusPath, []fallbackStatusPath{
+		{path: missingPath, source: statusSourceWorktree},
+	})
+	if err != nil {
+		t.Fatalf("copyFirstValidFallbackStatus: %v", err)
+	}
+	if source != statusSourceNone {
+		t.Fatalf("source=%q want empty", source)
+	}
+	if !strings.Contains(diagnostic, "missing status artifact") {
+		t.Fatalf("diagnostic=%q want mention of missing status artifact", diagnostic)
+	}
+}
+
+func TestCopyFirstValidFallbackStatus_PermanentCorruptFallbackIsDiagnosed(t *testing.T) {
+	tmp := t.TempDir()
+	stageStatusPath := filepath.Join(tmp, "logs", "a", "status.json")
+	fallbackPath := filepath.Join(tmp, "status.json")
+
+	if err := os.WriteFile(fallbackPath, []byte(`{ this is invalid json }`), 0o644); err != nil {
+		t.Fatalf("write corrupt fallback: %v", err)
+	}
+
+	source, diagnostic, err := copyFirstValidFallbackStatus(stageStatusPath, []fallbackStatusPath{
+		{path: fallbackPath, source: statusSourceWorktree},
+	})
+	if err != nil {
+		t.Fatalf("copyFirstValidFallbackStatus: %v", err)
+	}
+	if source != statusSourceNone {
+		t.Fatalf("source=%q want empty", source)
+	}
+	if !strings.Contains(diagnostic, "corrupt status artifact") {
+		t.Fatalf("diagnostic=%q want mention of corrupt status artifact", diagnostic)
+	}
+}
+
+func TestCopyFirstValidFallbackStatus_RetryDecodeSucceedsAfterTransientCorruption(t *testing.T) {
+	tmp := t.TempDir()
+	stageStatusPath := filepath.Join(tmp, "logs", "a", "status.json")
+	fallbackPath := filepath.Join(tmp, "status.json")
+
+	if err := os.WriteFile(fallbackPath, []byte(`{ this is invalid json }`), 0o644); err != nil {
+		t.Fatalf("seed transient corrupt fallback: %v", err)
+	}
+
+	go func() {
+		time.Sleep(fallbackStatusDecodeBaseDelay + 10*time.Millisecond)
+		_ = os.WriteFile(fallbackPath, []byte(`{"status":"fail","failure_reason":"transient decode retry success"}`), 0o644)
+	}()
+
+	source, diagnostic, err := copyFirstValidFallbackStatus(stageStatusPath, []fallbackStatusPath{
+		{path: fallbackPath, source: statusSourceWorktree},
+	})
+	if err != nil {
+		t.Fatalf("copyFirstValidFallbackStatus: %v", err)
+	}
+	if source != statusSourceWorktree {
+		t.Fatalf("source=%q want %q", source, statusSourceWorktree)
+	}
+	if strings.TrimSpace(diagnostic) != "" {
+		t.Fatalf("diagnostic=%q want empty", diagnostic)
+	}
+
+	b, err := os.ReadFile(stageStatusPath)
+	if err != nil {
+		t.Fatalf("read copied stage status: %v", err)
+	}
+	out, err := runtime.DecodeOutcomeJSON(b)
+	if err != nil {
+		t.Fatalf("decode copied stage status: %v", err)
+	}
+	if out.Status != runtime.StatusFail {
+		t.Fatalf("status=%q want %q", out.Status, runtime.StatusFail)
+	}
+	if out.FailureReason != "transient decode retry success" {
+		t.Fatalf("failure_reason=%q want %q", out.FailureReason, "transient decode retry success")
+	}
+}
+
+func TestCopyFirstValidFallbackStatus_TypeMismatchIsInvalidPayload(t *testing.T) {
+	tmp := t.TempDir()
+	stageStatusPath := filepath.Join(tmp, "logs", "a", "status.json")
+	fallbackPath := filepath.Join(tmp, "status.json")
+
+	// Deterministic schema/type violation: status must be a string.
+	if err := os.WriteFile(fallbackPath, []byte(`{"status":123}`), 0o644); err != nil {
+		t.Fatalf("write invalid payload fallback: %v", err)
+	}
+
+	source, diagnostic, err := copyFirstValidFallbackStatus(stageStatusPath, []fallbackStatusPath{
+		{path: fallbackPath, source: statusSourceWorktree},
+	})
+	if err != nil {
+		t.Fatalf("copyFirstValidFallbackStatus: %v", err)
+	}
+	if source != statusSourceNone {
+		t.Fatalf("source=%q want empty", source)
+	}
+	if !strings.Contains(diagnostic, "invalid status payload") {
+		t.Fatalf("diagnostic=%q want mention of invalid status payload", diagnostic)
+	}
+	if strings.Contains(diagnostic, "corrupt status artifact") {
+		t.Fatalf("diagnostic=%q should not classify type mismatch as corrupt artifact", diagnostic)
+	}
+}
+
+func TestShouldRetryFallbackRead_DoesNotRetryMissing(t *testing.T) {
+	if shouldRetryFallbackRead(fallbackFailureModeMissing, os.ErrNotExist) {
+		t.Fatalf("missing fallback paths should not be retried")
+	}
+}
+
+func TestShouldRetryFallbackRead_RetriesUnexpectedEOF(t *testing.T) {
+	if !shouldRetryFallbackRead(fallbackFailureModeUnreadable, io.ErrUnexpectedEOF) {
+		t.Fatalf("unexpected EOF should remain retryable for unreadable fallback artifacts")
+	}
+}
+
+func TestShouldRetryFallbackRead_DoesNotRetryRegularUnreadable(t *testing.T) {
+	if shouldRetryFallbackRead(fallbackFailureModeUnreadable, errors.New("permission denied")) {
+		t.Fatalf("regular unreadable fallback artifacts should not be retried")
+	}
+}
diff --git a/internal/attractor/engine/status_json_test.go b/internal/attractor/engine/status_json_test.go
index df90a1f0..6bdc5579 100644
--- a/internal/attractor/engine/status_json_test.go
+++ b/internal/attractor/engine/status_json_test.go
@@ -74,8 +74,21 @@ func TestCodergenStatusIngestion_FallbackOnlyWhenCanonicalMissing(t *testing.T)
 }
 
 func TestCodergenStatusIngestion_InvalidFallbackIsRejected(t *testing.T) {
-	_, source := runStatusIngestionFixture(t, false, false, true)
+	out, source := runStatusIngestionFixture(t, false, false, true)
 	if source != "" {
 		t.Fatalf("source=%q want empty", source)
 	}
+	if out.Status != runtime.StatusFail {
+		t.Fatalf("status=%q want %q", out.Status, runtime.StatusFail)
+	}
+}
+
+func TestCodergenStatusIngestion_MissingFallbackIsDiagnosed(t *testing.T) {
+	out, source := runStatusIngestionFixture(t, false, false, false)
+	if source != "" {
+		t.Fatalf("source=%q want empty", source)
+	}
+	if out.Status != runtime.StatusFail {
+		t.Fatalf("status=%q want %q", out.Status, runtime.StatusFail)
+	}
 }
diff --git a/internal/attractor/runtime/status.go b/internal/attractor/runtime/status.go
index a33f1f92..ef479bce 100644
--- a/internal/attractor/runtime/status.go
+++ b/internal/attractor/runtime/status.go
@@ -9,17 +9,20 @@ import (
 type StageStatus string
 
 const (
-	StatusSuccess        StageStatus = "success"
-	StatusPartialSuccess StageStatus = "partial_success"
-	StatusRetry          StageStatus = "retry"
-	StatusFail           StageStatus = "fail"
-	StatusSkipped        StageStatus = "skipped"
+	StatusSuccess         StageStatus = "success"
+	StatusDegradedSuccess StageStatus = "degraded_success"
+	StatusPartialSuccess  StageStatus = "partial_success"
+	StatusRetry           StageStatus = "retry"
+	StatusFail            StageStatus = "fail"
+	StatusSkipped         StageStatus = "skipped"
 )
 
 func ParseStageStatus(s string) (StageStatus, error) {
 	switch strings.ToLower(strings.TrimSpace(s)) {
 	case "success", "ok":
 		return StatusSuccess, nil
+	case "degraded_success", "degradedsuccess", "degraded-success":
+		return StatusDegradedSuccess, nil
 	case "partial_success", "partialsuccess", "partial-success":
 		return StatusPartialSuccess, nil
 	case "retry":
@@ -49,20 +52,36 @@ func (s StageStatus) Valid() bool {
 // (success, partial_success, retry, fail, skipped) rather than a custom routing value.
 func (s StageStatus) IsCanonical() bool {
 	switch s {
-	case StatusSuccess, StatusPartialSuccess, StatusRetry, StatusFail, StatusSkipped:
+	case StatusSuccess, StatusDegradedSuccess, StatusPartialSuccess, StatusRetry, StatusFail, StatusSkipped:
 		return true
 	default:
 		return false
 	}
 }
 
+// VerificationResult captures the outcome of verification commands run during a stage.
+type VerificationResult struct {
+	Status        string              `json:"status"`                   // "passed", "failed", or "blocked"
+	BlockedReason string              `json:"blocked_reason,omitempty"` // why verification could not run
+	Commands      []VerificationEntry `json:"commands,omitempty"`       // individual command results
+}
+
+// VerificationEntry records the result of a single verification command.
+type VerificationEntry struct {
+	Command  string `json:"command"`
+	ExitCode int    `json:"exit_code"`
+	Blocked  bool   `json:"blocked,omitempty"`
+	Reason   string `json:"reason,omitempty"`
+}
+
 type Outcome struct {
-	Status           StageStatus    `json:"status"`
-	PreferredLabel   string         `json:"preferred_label,omitempty"`
-	SuggestedNextIDs []string       `json:"suggested_next_ids,omitempty"`
-	ContextUpdates   map[string]any `json:"context_updates,omitempty"`
-	Notes            string         `json:"notes,omitempty"`
-	FailureReason    string         `json:"failure_reason,omitempty"`
+	Status           StageStatus         `json:"status"`
+	PreferredLabel   string              `json:"preferred_label,omitempty"`
+	SuggestedNextIDs []string            `json:"suggested_next_ids,omitempty"`
+	ContextUpdates   map[string]any      `json:"context_updates,omitempty"`
+	Notes            string              `json:"notes,omitempty"`
+	FailureReason    string              `json:"failure_reason,omitempty"`
+	Verification     *VerificationResult `json:"verification,omitempty"`
 	// Details is optional structured information for failures (or for debugging).
 	// The engine does not use it for routing, but it must be preserved when present.
 	Details any `json:"details,omitempty"`
diff --git a/internal/attractor/runtime/status_test.go b/internal/attractor/runtime/status_test.go
index b62e4d1c..7c6e7e30 100644
--- a/internal/attractor/runtime/status_test.go
+++ b/internal/attractor/runtime/status_test.go
@@ -11,12 +11,15 @@ func TestParseStageStatus_CanonicalAndLegacy(t *testing.T) {
 		want StageStatus
 	}{
 		{"success", StatusSuccess},
+		{"degraded_success", StatusDegradedSuccess},
 		{"partial_success", StatusPartialSuccess},
 		{"retry", StatusRetry},
 		{"fail", StatusFail},
 		{"skipped", StatusSkipped},
 		// Compatibility aliases.
 		{"ok", StatusSuccess},
+		{"degraded-success", StatusDegradedSuccess},
+		{"degradedsuccess", StatusDegradedSuccess},
 		{"error", StatusFail},
 		{"SUCCESS", StatusSuccess},
 		{"FAIL", StatusFail},
@@ -68,6 +71,9 @@ func TestStageStatus_IsCanonical(t *testing.T) {
 	if !StatusSuccess.IsCanonical() {
 		t.Fatalf("StatusSuccess should be canonical")
 	}
+	if !StatusDegradedSuccess.IsCanonical() {
+		t.Fatalf("StatusDegradedSuccess should be canonical")
+	}
 	if !StatusFail.IsCanonical() {
 		t.Fatalf("StatusFail should be canonical")
 	}
@@ -164,6 +170,72 @@ func TestDecodeOutcomeJSON_LegacyFailDetails_PopulatesFailureReason(t *testing.T
 	}
 }
 
+func TestDecodeOutcomeJSON_DegradedSuccessWithVerification(t *testing.T) {
+	input := `{
+		"status": "degraded_success",
+		"notes": "implementation complete but tsc blocked by DNS failure",
+		"verification": {
+			"status": "blocked",
+			"blocked_reason": "npm registry DNS failure",
+			"commands": [
+				{"command": "npx tsc", "exit_code": 1, "blocked": true, "reason": "EAI_AGAIN"}
+			]
+		}
+	}`
+	o, err := DecodeOutcomeJSON([]byte(input))
+	if err != nil {
+		t.Fatalf("DecodeOutcomeJSON: %v", err)
+	}
+	if o.Status != StatusDegradedSuccess {
+		t.Fatalf("status: got %q want %q", o.Status, StatusDegradedSuccess)
+	}
+	if o.Verification == nil {
+		t.Fatal("expected non-nil verification")
+	}
+	if o.Verification.Status != "blocked" {
+		t.Fatalf("verification status: got %q want %q", o.Verification.Status, "blocked")
+	}
+	if o.Verification.BlockedReason != "npm registry DNS failure" {
+		t.Fatalf("verification blocked_reason: got %q", o.Verification.BlockedReason)
+	}
+	if len(o.Verification.Commands) != 1 {
+		t.Fatalf("expected 1 verification command, got %d", len(o.Verification.Commands))
+	}
+	cmd := o.Verification.Commands[0]
+	if cmd.Command != "npx tsc" || cmd.ExitCode != 1 || !cmd.Blocked {
+		t.Fatalf("unexpected verification command: %+v", cmd)
+	}
+}
+
+func TestDecodeOutcomeJSON_SuccessWithPassedVerification(t *testing.T) {
+	input := `{
+		"status": "success",
+		"verification": {
+			"status": "passed",
+			"commands": [
+				{"command": "go test ./...", "exit_code": 0},
+				{"command": "go vet ./...", "exit_code": 0}
+			]
+		}
+	}`
+	o, err := DecodeOutcomeJSON([]byte(input))
+	if err != nil {
+		t.Fatalf("DecodeOutcomeJSON: %v", err)
+	}
+	if o.Status != StatusSuccess {
+		t.Fatalf("status: got %q want %q", o.Status, StatusSuccess)
+	}
+	if o.Verification == nil {
+		t.Fatal("expected non-nil verification")
+	}
+	if o.Verification.Status != "passed" {
+		t.Fatalf("verification status: got %q want %q", o.Verification.Status, "passed")
+	}
+	if len(o.Verification.Commands) != 2 {
+		t.Fatalf("expected 2 verification commands, got %d", len(o.Verification.Commands))
+	}
+}
+
 func TestDecodeOutcomeJSON_LegacyRetryDetails_PopulatesFailureReason(t *testing.T) {
 	o, err := DecodeOutcomeJSON([]byte(`{"outcome":"retry","details":"transient timeout"}`))
 	if err != nil {