diff --git a/cmd/kilroy/main.go b/cmd/kilroy/main.go index 29a08b8d..fb7c1ea8 100644 --- a/cmd/kilroy/main.go +++ b/cmd/kilroy/main.go @@ -110,7 +110,7 @@ func loadEnvFile(args []string) []string { func usage() { fmt.Fprintln(os.Stderr, "usage:") fmt.Fprintln(os.Stderr, " kilroy --version") - fmt.Fprintln(os.Stderr, " kilroy [--env-file ] attractor run [--detach] [--preflight|--test-run] [--allow-test-shim] [--confirm-stale-build] [--no-cxdb] [--force-model ] --graph --config [--run-id ] [--logs-root ]") + fmt.Fprintln(os.Stderr, " kilroy [--env-file ] attractor run [--detach] [--validate|--preflight|--test-run] [--allow-test-shim] [--confirm-stale-build] [--no-cxdb] [--force-model ] --graph --config [--run-id ] [--logs-root ]") fmt.Fprintln(os.Stderr, " kilroy attractor resume --logs-root ") fmt.Fprintln(os.Stderr, " kilroy attractor resume --cxdb --context-id ") fmt.Fprintln(os.Stderr, " kilroy attractor resume --run-branch [--repo ]") @@ -175,7 +175,7 @@ func attractorRun(args []string) { switch args[i] { case "--detach": detach = true - case "--preflight": + case "--preflight", "--validate": preflightOnly = true case "--test-run": preflightOnly = true @@ -233,7 +233,7 @@ func attractorRun(args []string) { os.Exit(1) } if preflightOnly && detach { - fmt.Fprintln(os.Stderr, "--preflight/--test-run cannot be combined with --detach") + fmt.Fprintln(os.Stderr, "--validate/--preflight/--test-run cannot be combined with --detach") os.Exit(1) } if err := ensureFreshKilroyBuild(confirmStaleBuild); err != nil { diff --git a/cmd/kilroy/main_exit_codes_test.go b/cmd/kilroy/main_exit_codes_test.go index 7de27d1e..626f24ba 100644 --- a/cmd/kilroy/main_exit_codes_test.go +++ b/cmd/kilroy/main_exit_codes_test.go @@ -630,6 +630,31 @@ digraph G { } } +func TestAttractorRun_ValidateAlias_Accepted(t *testing.T) { + bin := buildKilroyBinary(t) + repo := initTestRepo(t) + catalog := writePinnedCatalog(t) + cfg := writeRunConfig(t, repo, "http://127.0.0.1:9", "127.0.0.1:9", catalog) + + graph := filepath.Join(t.TempDir(), "validate.dot") + _ = os.WriteFile(graph, []byte(` +digraph G { + start [shape=Mdiamond] + exit [shape=Msquare] + start -> exit +} +`), 0o644) + + logsRoot := filepath.Join(t.TempDir(), "logs") + code, out := runKilroy(t, bin, "attractor", "run", "--graph", graph, "--config", cfg, "--run-id", "validate-flag", "--logs-root", logsRoot, "--no-cxdb", "--validate") + if code != 0 { + t.Fatalf("exit code: got %d want 0\n%s", code, out) + } + if !strings.Contains(out, "preflight=true") { + t.Fatalf("expected preflight marker, got:\n%s", out) + } +} + func TestAttractorRun_PreflightRejectsDetach(t *testing.T) { bin := buildKilroyBinary(t) repo := initTestRepo(t) @@ -650,7 +675,7 @@ digraph G { if code != 1 { t.Fatalf("exit code: got %d want 1\n%s", code, out) } - if !strings.Contains(out, "--preflight/--test-run cannot be combined with --detach") { + if !strings.Contains(out, "--validate/--preflight/--test-run cannot be combined with --detach") { t.Fatalf("expected incompatible flag error, got:\n%s", out) } } @@ -782,6 +807,9 @@ func TestUsage_IncludesPreflightFlags(t *testing.T) { if code != 1 { t.Fatalf("exit code: got %d want 1\n%s", code, out) } + if !strings.Contains(out, "--validate") { + t.Fatalf("usage should include --validate; output:\n%s", out) + } if !strings.Contains(out, "--preflight") { t.Fatalf("usage should include --preflight; output:\n%s", out) } diff --git a/demo/substack-spec-v01.dot b/demo/substack-spec-v01.dot index 486eb721..1bcc4e10 100644 --- a/demo/substack-spec-v01.dot +++ b/demo/substack-spec-v01.dot @@ -81,7 +81,7 @@ digraph substack_spec_v01 { max_agent_turns=300, max_tokens=32768, label="Implement", - prompt="Goal: $goal\n\nRead .ai/runs/$KILROY_RUN_ID/plan_final.md, .ai/runs/$KILROY_RUN_ID/spec.md, and .ai/runs/$KILROY_RUN_ID/definition_of_done.md. If the spec or DoD files do not exist at those paths, fall back to reading substack-spec-v01.md and substack-dod-v01.md directly.\n\nBEFORE ANYTHING ELSE: check if .ai/verify_errors.log exists. If it does, read it — it contains the exact commands that failed and their error output from the verify chain. Fix every error listed in that file, then delete .ai/verify_errors.log when all fixes are applied. Do NOT regenerate working code — only fix the specific errors.\n\nAlso check if .ai/runs/$KILROY_RUN_ID/verify_fidelity.md exists. If it does, read it — it contains per-AC pass/fail verdicts from the fidelity check. Fix every failing AC listed in that file.\n\nIf .ai/runs/$KILROY_RUN_ID/postmortem_latest.md exists, read it and fix ONLY identified gaps — do NOT regenerate working code. On repair passes, read and fix existing files rather than skipping them.\n\nImplement the complete Substack Creator Newsletter Engine as a single pass. On a fresh pass (no postmortem), check if target files already exist on disk and are non-empty — if so, skip those files. Implement each module with complete, functional code — no stubs, no placeholders, no TODO comments. Follow the plan and spec precisely.\n\nImplementation order (core infrastructure first, then features, then tests/deploy):\n\n1. Project scaffold — package.json, vite.config.ts, tsconfig.json, index.html, src/main.tsx, src/App.tsx. Install dependencies: react, react-dom, react-router-dom, idb, @google/generative-ai. Write ALL validation scripts:\n - scripts/validate-build.sh: runs npm run build, checks dist/ exists\n - scripts/validate-fmt.sh: runs npx prettier --check src/\n - scripts/validate-test.sh: runs integration scenarios first, then smoke, writes evidence + .ai/test-evidence/latest/manifest.json even on failure\n - scripts/validate-browser.sh: runs browser verification and captures artifacts\n - scripts/fix-fmt.sh: runs npx prettier --write src/\n - scripts/validate-artifacts.sh: verifies manifest scenario IDs match DoD integration scenarios\n All scripts: #!/bin/sh, set -e, POSIX sh failure trap.\n\n2. LLM client — src/lib/llm.ts, src/lib/llm-schemas.ts: structured JSON output with schema enforcement, retry with error feedback and intelligent backoff, model switching (gemini-3-flash-preview/gemini-3.1-pro-preview/gemini-2.5-flash-lite), client-side API key.\n\n3. Persistence — src/lib/db.ts, src/lib/types.ts: IndexedDB via idb with stores for configuration (API key, company, voice, guardrails), drafts, sessions (all inputs/LLM responses/intermediate state), post history (Markdown + attribution). All data persists unless user resets.\n\n4. Shared UI — src/components/RichInput.tsx, Card.tsx, ProgressBar.tsx, StepIndicator.tsx, src/styles/global.css.\n\n5. Setup flow — src/pages/Settings.tsx and step components: API key, company (rich input + gemini-3.1-pro-preview confirm + back), voice, guardrails. Parallel completion, status icons. Reset everything with confirmation.\n\n6. Dashboard — src/pages/Dashboard.tsx: New Post button, Trending Topics button, Settings link, post history, draft resume.\n\n7. Trending Topics — src/pages/TrendingTopics.tsx: parallel gemini-3-flash-preview search grounding queries, trend visualization, 3 gemini-3.1-pro-preview writing prompts, navigate to New Post prefilled.\n\n8. New Post — src/pages/NewPost.tsx with step components: Topic (rich input), Research (gemini-3-flash-preview search grounding, source cards with URL/title/author/date, highlight/delete), Outline (gemini-3.1-pro-preview one-shot, accept/back), Write (3 automatic gemini-3.1-pro-preview cycles: Write with citations, Edit for style, Guardrails-only), Complete (serif post with numbered footnotes, linked sources, attribution lineage).\n\n9. Demo mode — src/lib/demo.ts, src/pages/DemoMode.tsx, src/demo/bundled-session.json: session recording, replay through production path (fade-in prefills, highlight next button), bundled P&G session, cache-miss error with no API fallback.\n\n10. Test infrastructure — src/__tests__/: integration with canned data, smoke with gemini-2.5-flash-lite, manual mode option.\n\n11. Deploy config — railway.json or equivalent, deployment docs.\n\nEnsure App.tsx routing includes all pages. Verify imports/exports are consistent. Run npm install. Fix TypeScript errors.\n\nLog progress to .ai/runs/$KILROY_RUN_ID/implementation_log.md.\n\nPRE-EXIT VERIFICATION: if .ai/runs/$KILROY_RUN_ID/postmortem_latest.md exists, run sh scripts/validate-build.sh and re-read targeted files to confirm fixes.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH) ONLY on failure: status=fail with failure_reason, failure_class, and failure_signature=implement_fail." + prompt="Goal: $goal\n\nRead .ai/runs/$KILROY_RUN_ID/plan_final.md, .ai/runs/$KILROY_RUN_ID/spec.md, and .ai/runs/$KILROY_RUN_ID/definition_of_done.md. If the spec or DoD files do not exist at those paths, fall back to reading substack-spec-v01.md and substack-dod-v01.md directly.\n\nBEFORE ANYTHING ELSE: check if .ai/runs/$KILROY_RUN_ID/verify_errors.log exists. If it does, read it — it contains the exact commands that failed and their error output from the verify chain. Fix every error listed in that file, then delete .ai/runs/$KILROY_RUN_ID/verify_errors.log when all fixes are applied. Do NOT regenerate working code — only fix the specific errors.\n\nAlso check if .ai/runs/$KILROY_RUN_ID/verify_fidelity.md exists. If it does, read it — it contains per-AC pass/fail verdicts from the fidelity check. Fix every failing AC listed in that file.\n\nIf .ai/runs/$KILROY_RUN_ID/postmortem_latest.md exists, read it and fix ONLY identified gaps — do NOT regenerate working code. On repair passes, read and fix existing files rather than skipping them.\n\nImplement the complete Substack Creator Newsletter Engine as a single pass. On a fresh pass (no postmortem), check if target files already exist on disk and are non-empty — if so, skip those files. Implement each module with complete, functional code — no stubs, no placeholders, no TODO comments. Follow the plan and spec precisely.\n\nImplementation order (core infrastructure first, then features, then tests/deploy):\n\n1. Project scaffold — package.json, vite.config.ts, tsconfig.json, index.html, src/main.tsx, src/App.tsx. Install dependencies: react, react-dom, react-router-dom, idb, @google/generative-ai. Write ALL validation scripts:\n - scripts/validate-build.sh: runs npm run build, checks dist/ exists\n - scripts/validate-fmt.sh: runs npx prettier --check src/\n - scripts/validate-test.sh: runs integration scenarios first, then smoke, writes evidence + .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json even on failure\n - scripts/validate-browser.sh: runs browser verification and captures artifacts\n - scripts/fix-fmt.sh: runs npx prettier --write src/\n - scripts/validate-artifacts.sh: verifies manifest scenario IDs match DoD integration scenarios\n All scripts: #!/bin/sh, set -e, POSIX sh failure trap.\n\n2. LLM client — src/lib/llm.ts, src/lib/llm-schemas.ts: structured JSON output with schema enforcement, retry with error feedback and intelligent backoff, model switching (gemini-3-flash-preview/gemini-3.1-pro-preview/gemini-2.5-flash-lite), client-side API key.\n\n3. Persistence — src/lib/db.ts, src/lib/types.ts: IndexedDB via idb with stores for configuration (API key, company, voice, guardrails), drafts, sessions (all inputs/LLM responses/intermediate state), post history (Markdown + attribution). All data persists unless user resets.\n\n4. Shared UI — src/components/RichInput.tsx, Card.tsx, ProgressBar.tsx, StepIndicator.tsx, src/styles/global.css.\n\n5. Setup flow — src/pages/Settings.tsx and step components: API key, company (rich input + gemini-3.1-pro-preview confirm + back), voice, guardrails. Parallel completion, status icons. Reset everything with confirmation.\n\n6. Dashboard — src/pages/Dashboard.tsx: New Post button, Trending Topics button, Settings link, post history, draft resume.\n\n7. Trending Topics — src/pages/TrendingTopics.tsx: parallel gemini-3-flash-preview search grounding queries, trend visualization, 3 gemini-3.1-pro-preview writing prompts, navigate to New Post prefilled.\n\n8. New Post — src/pages/NewPost.tsx with step components: Topic (rich input), Research (gemini-3-flash-preview search grounding, source cards with URL/title/author/date, highlight/delete), Outline (gemini-3.1-pro-preview one-shot, accept/back), Write (3 automatic gemini-3.1-pro-preview cycles: Write with citations, Edit for style, Guardrails-only), Complete (serif post with numbered footnotes, linked sources, attribution lineage).\n\n9. Demo mode — src/lib/demo.ts, src/pages/DemoMode.tsx, src/demo/bundled-session.json: session recording, replay through production path (fade-in prefills, highlight next button), bundled P&G session, cache-miss error with no API fallback.\n\n10. Test infrastructure — src/__tests__/: integration with canned data, smoke with gemini-2.5-flash-lite, manual mode option.\n\n11. Deploy config — railway.json or equivalent, deployment docs.\n\nEnsure App.tsx routing includes all pages. Verify imports/exports are consistent. Run npm install. Fix TypeScript errors.\n\nLog progress to .ai/runs/$KILROY_RUN_ID/implementation_log.md.\n\nPRE-EXIT VERIFICATION: if .ai/runs/$KILROY_RUN_ID/postmortem_latest.md exists, run sh scripts/validate-build.sh and re-read targeted files to confirm fixes.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH) ONLY on failure: status=fail with failure_reason, failure_class, and failure_signature=implement_fail." ] } @@ -95,46 +95,46 @@ digraph substack_spec_v01 { fix_fmt [ shape=parallelogram, max_retries=0, - tool_command="sh scripts/fix-fmt.sh 2>&1 || { printf '\\n=== VERIFY FAILURE: sh scripts/fix-fmt.sh ===\\n%s\\n' \"$(cat /tmp/fix-fmt.log 2>/dev/null || echo 'script missing or produced no output')\" >> .ai/verify_errors.log; exit 1; }" + tool_command="sh scripts/fix-fmt.sh 2>&1 || { printf '\\n=== VERIFY FAILURE: sh scripts/fix-fmt.sh ===\\n%s\\n' \"$(cat /tmp/fix-fmt.log 2>/dev/null || echo 'script missing or produced no output')\" >> .ai/runs/$KILROY_RUN_ID/verify_errors.log; exit 1; }" ] verify_fmt [ shape=parallelogram, max_retries=0, - tool_command="sh scripts/validate-fmt.sh 2>&1 | tee /tmp/validate-fmt.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-fmt.sh ===\\n%s\\n' \"$(tail -30 /tmp/validate-fmt.log)\" >> .ai/verify_errors.log; exit 1; }" + tool_command="sh scripts/validate-fmt.sh 2>&1 | tee /tmp/validate-fmt.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-fmt.sh ===\\n%s\\n' \"$(tail -30 /tmp/validate-fmt.log)\" >> .ai/runs/$KILROY_RUN_ID/verify_errors.log; exit 1; }" ] check_fmt [shape=diamond, label="Fmt OK?"] verify_build [ shape=parallelogram, - tool_command="sh scripts/validate-build.sh 2>&1 | tee /tmp/validate-build.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-build.sh ===\\n%s\\n' \"$(tail -50 /tmp/validate-build.log)\" >> .ai/verify_errors.log; exit 1; }" + tool_command="sh scripts/validate-build.sh 2>&1 | tee /tmp/validate-build.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-build.sh ===\\n%s\\n' \"$(tail -50 /tmp/validate-build.log)\" >> .ai/runs/$KILROY_RUN_ID/verify_errors.log; exit 1; }" ] check_build [shape=diamond, label="Build OK?"] verify_test [ shape=parallelogram, - tool_command="sh scripts/validate-test.sh 2>&1 | tee /tmp/validate-test.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-test.sh ===\\n%s\\n' \"$(tail -50 /tmp/validate-test.log)\" >> .ai/verify_errors.log; exit 1; }" + tool_command="sh scripts/validate-test.sh 2>&1 | tee /tmp/validate-test.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-test.sh ===\\n%s\\n' \"$(tail -50 /tmp/validate-test.log)\" >> .ai/runs/$KILROY_RUN_ID/verify_errors.log; exit 1; }" ] check_test [shape=diamond, label="Tests OK?"] verify_browser [ shape=parallelogram, collect_browser_artifacts=true, - tool_command="sh scripts/validate-browser.sh 2>&1 | tee /tmp/validate-browser.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-browser.sh ===\\n%s\\n' \"$(tail -50 /tmp/validate-browser.log)\" >> .ai/verify_errors.log; exit 1; }" + tool_command="sh scripts/validate-browser.sh 2>&1 | tee /tmp/validate-browser.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-browser.sh ===\\n%s\\n' \"$(tail -50 /tmp/validate-browser.log)\" >> .ai/runs/$KILROY_RUN_ID/verify_errors.log; exit 1; }" ] check_browser [shape=diamond, label="Browser OK?"] verify_artifacts [ shape=parallelogram, max_retries=0, - tool_command="sh scripts/validate-artifacts.sh 2>&1 | tee /tmp/validate-artifacts.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-artifacts.sh ===\\n%s\\n' \"$(tail -30 /tmp/validate-artifacts.log)\" >> .ai/verify_errors.log; exit 1; }" + tool_command="sh scripts/validate-artifacts.sh 2>&1 | tee /tmp/validate-artifacts.log; test ${PIPESTATUS[0]} -eq 0 || { printf '\\n=== VERIFY FAILURE: sh scripts/validate-artifacts.sh ===\\n%s\\n' \"$(tail -30 /tmp/validate-artifacts.log)\" >> .ai/runs/$KILROY_RUN_ID/verify_errors.log; exit 1; }" ] check_artifacts [shape=diamond, label="Artifacts OK?"] verify_fidelity [ shape=box, class="verify", - prompt="Read .ai/runs/$KILROY_RUN_ID/spec.md, .ai/runs/$KILROY_RUN_ID/definition_of_done.md, .ai/runs/$KILROY_RUN_ID/verify_fidelity.md (if present), .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json, and relevant implementation files.\n\nEvaluate these grouped acceptance checks and map each to concrete file paths:\nAC1: src/**/settings* and src/**/router* and src/**/indexeddb* - first-run routing, setup flow, persistence.\nAC2: src/**/dashboard* and src/**/history* and src/**/draft* - dashboard actions and resume/view flows.\nAC3: src/**/trending* and src/**/research* - grounded research, deterministic trends, prompt handoff.\nAC4: src/**/new-post* and src/**/outline* and src/**/write* and src/**/complete* - full Topic->Complete pipeline with automatic write cycles.\nAC5: src/**/citation* and src/**/markdown* - citation lineage, footnote rendering, attribution persistence.\nAC6: src/**/demo* and src/demo/** - session picker/replay, bundled P&G demo, cache-miss no-fallback behavior.\nAC7: src/**/llm* and src/**/schema* - structured outputs, retry/backoff, production/test model intent.\nAC8: scripts/validate-build.sh and railway.json/Procfile/README* - build/deploy-readiness by static review only, no live deployment execution.\nAC9: scripts/validate-test.sh and scripts/validate-browser.sh and test sources - integration before smoke, manual mode option, browser evidence capture.\nAC10: .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json and .ai/runs/$KILROY_RUN_ID/test-evidence/latest/** - IT-1..IT-12 manifest coverage and required artifact types.\nAC11: src/**/style* and src/**/card* and src/**/progress* - Substack-like visual contract including bars, cards, and serif post preview.\n\nWrite .ai/runs/$KILROY_RUN_ID/verify_fidelity.md with pass/fail verdict and evidence per AC1..AC11.\n\nOn ANY failure: also append to .ai/verify_errors.log with the header '=== VERIFY FAILURE: verify_fidelity ===' followed by the list of failing ACs and their specific issues, so the implement node can read the consolidated error log.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH):\nAll pass: status=success.\nAny fail: status=fail with failure_reason listing failing ACs, failure_class=deterministic_agent_bug, failure_signature as sorted comma-separated failing AC IDs (e.g. AC1,AC10)." + prompt="Read .ai/runs/$KILROY_RUN_ID/spec.md, .ai/runs/$KILROY_RUN_ID/definition_of_done.md, .ai/runs/$KILROY_RUN_ID/verify_fidelity.md (if present), .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json, and relevant implementation files.\n\nEvaluate these grouped acceptance checks and map each to concrete file paths:\nAC1: src/**/settings* and src/**/router* and src/**/indexeddb* - first-run routing, setup flow, persistence.\nAC2: src/**/dashboard* and src/**/history* and src/**/draft* - dashboard actions and resume/view flows.\nAC3: src/**/trending* and src/**/research* - grounded research, deterministic trends, prompt handoff.\nAC4: src/**/new-post* and src/**/outline* and src/**/write* and src/**/complete* - full Topic->Complete pipeline with automatic write cycles.\nAC5: src/**/citation* and src/**/markdown* - citation lineage, footnote rendering, attribution persistence.\nAC6: src/**/demo* and src/demo/** - session picker/replay, bundled P&G demo, cache-miss no-fallback behavior.\nAC7: src/**/llm* and src/**/schema* - structured outputs, retry/backoff, production/test model intent.\nAC8: scripts/validate-build.sh and railway.json/Procfile/README* - build/deploy-readiness by static review only, no live deployment execution.\nAC9: scripts/validate-test.sh and scripts/validate-browser.sh and test sources - integration before smoke, manual mode option, browser evidence capture.\nAC10: .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json and .ai/runs/$KILROY_RUN_ID/test-evidence/latest/** - IT-1..IT-12 manifest coverage and required artifact types.\nAC11: src/**/style* and src/**/card* and src/**/progress* - Substack-like visual contract including bars, cards, and serif post preview.\n\nWrite .ai/runs/$KILROY_RUN_ID/verify_fidelity.md with pass/fail verdict and evidence per AC1..AC11.\n\nOn ANY failure: also append to .ai/runs/$KILROY_RUN_ID/verify_errors.log with the header '=== VERIFY FAILURE: verify_fidelity ===' followed by the list of failing ACs and their specific issues, so the implement node can read the consolidated error log.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH):\nAll pass: status=success.\nAny fail: status=fail with failure_reason listing failing ACs, failure_class=deterministic_agent_bug, failure_signature as sorted comma-separated failing AC IDs (e.g. AC1,AC10)." ] check_impl [shape=diamond, label="Impl OK?"] } @@ -151,12 +151,12 @@ digraph substack_spec_v01 { review_a [ class="branch-a", - prompt="Review the Substack Creator Newsletter Engine implementation against .ai/runs/$KILROY_RUN_ID/definition_of_done.md.\n\nRead the DoD for acceptance criteria. Read all implementation source files and .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json.\n\n## MANDATORY: Browser verification\nYou MUST verify the app works in a real browser. Do not trust code reading alone.\n1. Run: npm run build (must exit 0)\n2. Start the preview server: npx vite preview --port 4567 &\n3. Wait 2 seconds, then use curl to fetch http://localhost:4567/ and verify it returns HTML with a root div\n4. Check that the HTML references JS and CSS bundles\n5. Kill the preview server when done\n6. Check browser artifacts in .ai/test-evidence/latest/ — screenshots must be real rendered pages (not 1x1 placeholders). If screenshot files are under 5KB, they are fake. REJECT.\n7. Check that playwright-report or equivalent browser test output exists and shows real test execution\n\nIf browser verification fails or artifacts are fake, REJECT immediately.\n\n## Code and AC verification\nCheck every AC group (AC1 through AC11):\n\nAC1: Build exits 0, static assets produced, deployment config present and coherent (review only, no live deploy)\nAC2: IndexedDB persistence for API key, config, posts, drafts, sessions, attribution mappings across reloads\nAC3: Structured JSON output with retry/backoff, correct model routing (gemini-3-flash-preview/gemini-3.1-pro-preview/gemini-2.5-flash-lite), client-side key\nAC4: Parallel setup (any order), status icons, rich input (text/upload/link), gemini-3.1-pro-preview confirmation, back button\nAC5: Dashboard with New Post and Trending Topics buttons, Settings link, post history, draft resume\nAC6: Trending Topics: parallel gemini-3-flash-preview search research, trend visualization, 3 gemini-3.1-pro-preview writing prompts, navigate to New Post\nAC7: Full post pipeline with source metadata, highlight/delete, one-shot outline, 3 automatic write cycles, citations with attribution lineage\nAC8: Demo replay through production path, fade-in/highlight, bundled P&G session, cache-miss error\nAC9: Validation scripts and runtime evidence contract for build/test/browser checks\nAC10: IT-1..IT-12 evidence manifest coverage and artifact completeness\nAC11: Substack visual design: serif fonts, step dots, card primitive, accent progress bars, no spinners, footnoted post\n\nVerdict: APPROVED (all criteria met with evidence) or REJECTED (specific gaps by AC ID).\nWrite to .ai/runs/$KILROY_RUN_ID/review_a.md.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH): On success: status=success. On failure: status=fail with failure_reason and failure_class." + prompt="Review the Substack Creator Newsletter Engine implementation against .ai/runs/$KILROY_RUN_ID/definition_of_done.md.\n\nRead the DoD for acceptance criteria. Read all implementation source files and .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json.\n\n## MANDATORY: Browser verification\nYou MUST verify the app works in a real browser. Do not trust code reading alone.\n1. Run: npm run build (must exit 0)\n2. Start the preview server: npx vite preview --port 4567 &\n3. Wait 2 seconds, then use curl to fetch http://localhost:4567/ and verify it returns HTML with a root div\n4. Check that the HTML references JS and CSS bundles\n5. Kill the preview server when done\n6. Check browser artifacts in .ai/runs/$KILROY_RUN_ID/test-evidence/latest/ — screenshots must be real rendered pages (not 1x1 placeholders). If screenshot files are under 5KB, they are fake. REJECT.\n7. Check that playwright-report or equivalent browser test output exists and shows real test execution\n\nIf browser verification fails or artifacts are fake, REJECT immediately.\n\n## Code and AC verification\nCheck every AC group (AC1 through AC11):\n\nAC1: Build exits 0, static assets produced, deployment config present and coherent (review only, no live deploy)\nAC2: IndexedDB persistence for API key, config, posts, drafts, sessions, attribution mappings across reloads\nAC3: Structured JSON output with retry/backoff, correct model routing (gemini-3-flash-preview/gemini-3.1-pro-preview/gemini-2.5-flash-lite), client-side key\nAC4: Parallel setup (any order), status icons, rich input (text/upload/link), gemini-3.1-pro-preview confirmation, back button\nAC5: Dashboard with New Post and Trending Topics buttons, Settings link, post history, draft resume\nAC6: Trending Topics: parallel gemini-3-flash-preview search research, trend visualization, 3 gemini-3.1-pro-preview writing prompts, navigate to New Post\nAC7: Full post pipeline with source metadata, highlight/delete, one-shot outline, 3 automatic write cycles, citations with attribution lineage\nAC8: Demo replay through production path, fade-in/highlight, bundled P&G session, cache-miss error\nAC9: Validation scripts and runtime evidence contract for build/test/browser checks\nAC10: IT-1..IT-12 evidence manifest coverage and artifact completeness\nAC11: Substack visual design: serif fonts, step dots, card primitive, accent progress bars, no spinners, footnoted post\n\nVerdict: APPROVED (all criteria met with evidence) or REJECTED (specific gaps by AC ID).\nWrite to .ai/runs/$KILROY_RUN_ID/review_a.md.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH): On success: status=success. On failure: status=fail with failure_reason and failure_class." ] review_b [ class="branch-b", - prompt="Review the Substack Creator Newsletter Engine implementation against .ai/runs/$KILROY_RUN_ID/definition_of_done.md.\n\nRead the DoD for acceptance criteria. Read all implementation source files and .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json.\n\n## MANDATORY: Browser verification\nYou MUST verify the app works in a real browser. Do not trust code reading alone.\n1. Run: npm run build (must exit 0)\n2. Start the preview server: npx vite preview --port 4568 &\n3. Wait 2 seconds, then use curl to fetch http://localhost:4568/ and verify it returns HTML with a root div\n4. Check that the HTML references JS and CSS bundles\n5. Kill the preview server when done\n6. Check browser artifacts in .ai/test-evidence/latest/ — screenshots must be real rendered pages (not 1x1 placeholders). If screenshot files are under 5KB, they are fake. REJECT.\n7. Check that playwright-report or equivalent browser test output exists and shows real test execution\n\nIf browser verification fails or artifacts are fake, REJECT immediately.\n\n## Code and AC verification\nCheck every AC group (AC1 through AC11):\n\nAC1: Build exits 0, static assets produced, deployment config present and coherent (review only, no live deploy)\nAC2: IndexedDB persistence for API key, config, posts, drafts, sessions, attribution mappings across reloads\nAC3: Structured JSON output with retry/backoff, correct model routing (gemini-3-flash-preview/gemini-3.1-pro-preview/gemini-2.5-flash-lite), client-side key\nAC4: Parallel setup (any order), status icons, rich input (text/upload/link), gemini-3.1-pro-preview confirmation, back button\nAC5: Dashboard with New Post and Trending Topics buttons, Settings link, post history, draft resume\nAC6: Trending Topics: parallel gemini-3-flash-preview search research, trend visualization, 3 gemini-3.1-pro-preview writing prompts, navigate to New Post\nAC7: Full post pipeline with source metadata, highlight/delete, one-shot outline, 3 automatic write cycles, citations with attribution lineage\nAC8: Demo replay through production path, fade-in/highlight, bundled P&G session, cache-miss error\nAC9: Validation scripts and runtime evidence contract for build/test/browser checks\nAC10: IT-1..IT-12 evidence manifest coverage and artifact completeness\nAC11: Substack visual design: serif fonts, step dots, card primitive, accent progress bars, no spinners, footnoted post\n\nVerdict: APPROVED (all criteria met with evidence) or REJECTED (specific gaps by AC ID).\nWrite to .ai/runs/$KILROY_RUN_ID/review_b.md.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH): On success: status=success. On failure: status=fail with failure_reason and failure_class." + prompt="Review the Substack Creator Newsletter Engine implementation against .ai/runs/$KILROY_RUN_ID/definition_of_done.md.\n\nRead the DoD for acceptance criteria. Read all implementation source files and .ai/runs/$KILROY_RUN_ID/test-evidence/latest/manifest.json.\n\n## MANDATORY: Browser verification\nYou MUST verify the app works in a real browser. Do not trust code reading alone.\n1. Run: npm run build (must exit 0)\n2. Start the preview server: npx vite preview --port 4568 &\n3. Wait 2 seconds, then use curl to fetch http://localhost:4568/ and verify it returns HTML with a root div\n4. Check that the HTML references JS and CSS bundles\n5. Kill the preview server when done\n6. Check browser artifacts in .ai/runs/$KILROY_RUN_ID/test-evidence/latest/ — screenshots must be real rendered pages (not 1x1 placeholders). If screenshot files are under 5KB, they are fake. REJECT.\n7. Check that playwright-report or equivalent browser test output exists and shows real test execution\n\nIf browser verification fails or artifacts are fake, REJECT immediately.\n\n## Code and AC verification\nCheck every AC group (AC1 through AC11):\n\nAC1: Build exits 0, static assets produced, deployment config present and coherent (review only, no live deploy)\nAC2: IndexedDB persistence for API key, config, posts, drafts, sessions, attribution mappings across reloads\nAC3: Structured JSON output with retry/backoff, correct model routing (gemini-3-flash-preview/gemini-3.1-pro-preview/gemini-2.5-flash-lite), client-side key\nAC4: Parallel setup (any order), status icons, rich input (text/upload/link), gemini-3.1-pro-preview confirmation, back button\nAC5: Dashboard with New Post and Trending Topics buttons, Settings link, post history, draft resume\nAC6: Trending Topics: parallel gemini-3-flash-preview search research, trend visualization, 3 gemini-3.1-pro-preview writing prompts, navigate to New Post\nAC7: Full post pipeline with source metadata, highlight/delete, one-shot outline, 3 automatic write cycles, citations with attribution lineage\nAC8: Demo replay through production path, fade-in/highlight, bundled P&G session, cache-miss error\nAC9: Validation scripts and runtime evidence contract for build/test/browser checks\nAC10: IT-1..IT-12 evidence manifest coverage and artifact completeness\nAC11: Substack visual design: serif fonts, step dots, card primitive, accent progress bars, no spinners, footnoted post\n\nVerdict: APPROVED (all criteria met with evidence) or REJECTED (specific gaps by AC ID).\nWrite to .ai/runs/$KILROY_RUN_ID/review_b.md.\n\nWrite to $KILROY_STAGE_STATUS_PATH (fallback: $KILROY_STAGE_STATUS_FALLBACK_PATH): On success: status=success. On failure: status=fail with failure_reason and failure_class." ] review_consensus [ @@ -204,7 +204,7 @@ digraph substack_spec_v01 { // Implement -> Verify chain implement -> fix_fmt - // Verify chain — failures go directly back to implement (errors logged to .ai/verify_errors.log) + // Verify chain — failures go directly back to implement (errors logged to .ai/runs/$KILROY_RUN_ID/verify_errors.log) fix_fmt -> verify_fmt verify_fmt -> check_fmt check_fmt -> verify_build [condition="outcome=success"] diff --git a/internal/attractor/engine/cli_only_models.go b/internal/attractor/engine/cli_only_models.go index 2ff9de6a..2feb2bff 100644 --- a/internal/attractor/engine/cli_only_models.go +++ b/internal/attractor/engine/cli_only_models.go @@ -5,7 +5,8 @@ import "strings" // cliOnlyModelIDs lists models that MUST route through CLI backend regardless // of provider backend configuration. These models have no API endpoint. var cliOnlyModelIDs = map[string]bool{ - "gpt-5.4-spark": true, + "gpt-5.3-codex-spark": true, + "gpt-5.4-spark": true, } // isCLIOnlyModel returns true if the given model ID (with or without provider diff --git a/internal/attractor/engine/cli_only_models_test.go b/internal/attractor/engine/cli_only_models_test.go index 4e03c289..503144f8 100644 --- a/internal/attractor/engine/cli_only_models_test.go +++ b/internal/attractor/engine/cli_only_models_test.go @@ -8,7 +8,7 @@ func TestIsCLIOnlyModel(t *testing.T) { want bool }{ {"gpt-5.4-spark", true}, - {"GPT-5.4-SPARK", true}, // case-insensitive + {"GPT-5.4-SPARK", true}, // case-insensitive {"openai/gpt-5.4-spark", true}, // with provider prefix {"gpt-5.4", false}, // regular codex {"gpt-5.4", false}, diff --git a/internal/attractor/engine/codergen_heartbeat_test.go b/internal/attractor/engine/codergen_heartbeat_test.go index 408e8a7e..531b1a62 100644 --- a/internal/attractor/engine/codergen_heartbeat_test.go +++ b/internal/attractor/engine/codergen_heartbeat_test.go @@ -350,7 +350,9 @@ digraph G { // TestRunWithConfig_APIBackend_StallWatchdogFiresDespiteHeartbeatGoroutine verifies // that the stall watchdog still fires when the API agent_loop session is truly // stalled (no new session events) even though the heartbeat goroutine is running. -// The conditional heartbeat should NOT emit progress when event_count is static. +// Heartbeat events are emitted unconditionally for observability but use +// appendProgressLivenessOnly when no new events are produced, which does not +// reset the stall watchdog timer. func TestRunWithConfig_APIBackend_StallWatchdogFiresDespiteHeartbeatGoroutine(t *testing.T) { repo := initTestRepo(t) logsRoot := t.TempDir() @@ -417,7 +419,9 @@ digraph G { // TestRunWithConfig_CLIBackend_StallWatchdogFiresDespiteHeartbeatGoroutine verifies // that the stall watchdog still fires when the CLI codergen process is truly // stalled (no stdout/stderr output) even though the heartbeat goroutine is running. -// The conditional heartbeat should NOT emit progress when file sizes are static. +// Heartbeat events are emitted unconditionally for observability but use +// appendProgressLivenessOnly when no output growth is detected, which does not +// reset the stall watchdog timer. func TestRunWithConfig_CLIBackend_StallWatchdogFiresDespiteHeartbeatGoroutine(t *testing.T) { repo := initTestRepo(t) logsRoot := t.TempDir() @@ -472,6 +476,95 @@ digraph G { t.Logf("stall watchdog fired as expected: %v", err) } +func TestRunWithConfig_HeartbeatEmitsDuringQuietPeriods(t *testing.T) { + repo := initTestRepo(t) + logsRoot := t.TempDir() + + pinned := writePinnedCatalog(t) + cxdbSrv := newCXDBTestServer(t) + + // Create a mock codex CLI that produces initial output, then goes quiet, + // then produces more output. The quiet period should still produce heartbeats. + cli := filepath.Join(t.TempDir(), "codex") + if err := os.WriteFile(cli, []byte(`#!/usr/bin/env bash +set -euo pipefail +echo '{"item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"starting"}]}}' >&1 +# Quiet period: no output for 3 seconds. +sleep 3 +echo '{"item":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"done"}]}}' >&1 +`), 0o755); err != nil { + t.Fatal(err) + } + + t.Setenv("KILROY_CODERGEN_HEARTBEAT_INTERVAL", "1s") + t.Setenv("KILROY_CODEX_IDLE_TIMEOUT", "10s") + + cfg := &RunConfigFile{Version: 1} + cfg.Repo.Path = repo + cfg.CXDB.BinaryAddr = cxdbSrv.BinaryAddr() + cfg.CXDB.HTTPBaseURL = cxdbSrv.URL() + cfg.LLM.CLIProfile = "test_shim" + cfg.LLM.Providers = map[string]ProviderConfig{ + "openai": {Backend: BackendCLI, Executable: cli}, + } + cfg.ModelDB.OpenRouterModelInfoPath = pinned + cfg.ModelDB.OpenRouterModelInfoUpdatePolicy = "pinned" + cfg.Git.RunBranchPrefix = "attractor/run" + + dot := []byte(` +digraph G { + graph [goal="test quiet period heartbeats"] + start [shape=Mdiamond] + exit [shape=Msquare] + a [shape=box, llm_provider=openai, llm_model=gpt-5.4, prompt="do something quiet"] + start -> a -> exit +} +`) + + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + res, err := RunWithConfig(ctx, dot, cfg, RunOptions{RunID: "quiet-heartbeat-test", LogsRoot: logsRoot, AllowTestShim: true}) + if err != nil { + t.Fatalf("RunWithConfig: %v", err) + } + + progressPath := filepath.Join(res.LogsRoot, "progress.ndjson") + data, err := os.ReadFile(progressPath) + if err != nil { + t.Fatalf("read progress.ndjson: %v", err) + } + + heartbeats := 0 + var hasQuietHeartbeat bool + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + var ev map[string]any + if err := json.Unmarshal([]byte(line), &ev); err != nil { + continue + } + if ev["event"] == "stage_heartbeat" && ev["node_id"] == "a" { + heartbeats++ + if _, ok := ev["since_last_output_s"]; !ok { + t.Error("heartbeat missing since_last_output_s field") + } + sinceOutput, _ := ev["since_last_output_s"].(float64) + if sinceOutput >= 1 { + hasQuietHeartbeat = true + } + } + } + if heartbeats < 2 { + t.Fatalf("expected at least 2 heartbeat events (some during quiet period), got %d", heartbeats) + } + if !hasQuietHeartbeat { + t.Fatal("expected at least one heartbeat with since_last_output_s >= 1 (quiet period heartbeat)") + } + t.Logf("found %d heartbeat events, quiet period heartbeats present", heartbeats) +} + func TestRunWithConfig_HeartbeatStopsAfterProcessExit(t *testing.T) { events := runHeartbeatFixture(t) endIdx := findEventIndex(events, "stage_attempt_end", "a") diff --git a/internal/attractor/engine/codergen_router.go b/internal/attractor/engine/codergen_router.go index 769cc809..4b817e46 100644 --- a/internal/attractor/engine/codergen_router.go +++ b/internal/attractor/engine/codergen_router.go @@ -328,21 +328,30 @@ func (r *CodergenRouter) runAPI(ctx context.Context, execCtx *Execution, node *m ticker := time.NewTicker(interval) defer ticker.Stop() var lastCount int + lastEventTime := time.Now() for { select { case <-ticker.C: eventsMu.Lock() count := len(events) eventsMu.Unlock() - if count > lastCount { + eventsGrew := count > lastCount + if eventsGrew { lastCount = count - if execCtx != nil && execCtx.Engine != nil { - execCtx.Engine.appendProgress(map[string]any{ - "event": "stage_heartbeat", - "node_id": node.ID, - "elapsed_s": int(time.Since(apiStart).Seconds()), - "event_count": count, - }) + lastEventTime = time.Now() + } + if execCtx != nil && execCtx.Engine != nil { + ev := map[string]any{ + "event": "stage_heartbeat", + "node_id": node.ID, + "elapsed_s": int(time.Since(apiStart).Seconds()), + "event_count": count, + "since_last_output_s": int(time.Since(lastEventTime).Seconds()), + } + if eventsGrew { + execCtx.Engine.appendProgress(ev) + } else { + execCtx.Engine.appendProgressLivenessOnly(ev) } } case <-heartbeatStop: @@ -1159,22 +1168,31 @@ func (r *CodergenRouter) runCLI(ctx context.Context, execCtx *Execution, node *m ticker := time.NewTicker(interval) defer ticker.Stop() var lastStdoutSz, lastStderrSz int64 + lastOutputTime := time.Now() for { select { case <-ticker.C: stdoutSz, _ := fileSize(stdoutPath) stderrSz, _ := fileSize(stderrPath) - if stdoutSz > lastStdoutSz || stderrSz > lastStderrSz { + outputGrew := stdoutSz > lastStdoutSz || stderrSz > lastStderrSz + if outputGrew { lastStdoutSz = stdoutSz lastStderrSz = stderrSz - if execCtx != nil && execCtx.Engine != nil { - execCtx.Engine.appendProgress(map[string]any{ - "event": "stage_heartbeat", - "node_id": node.ID, - "elapsed_s": int(time.Since(start).Seconds()), - "stdout_bytes": stdoutSz, - "stderr_bytes": stderrSz, - }) + lastOutputTime = time.Now() + } + if execCtx != nil && execCtx.Engine != nil { + ev := map[string]any{ + "event": "stage_heartbeat", + "node_id": node.ID, + "elapsed_s": int(time.Since(start).Seconds()), + "stdout_bytes": stdoutSz, + "stderr_bytes": stderrSz, + "since_last_output_s": int(time.Since(lastOutputTime).Seconds()), + } + if outputGrew { + execCtx.Engine.appendProgress(ev) + } else { + execCtx.Engine.appendProgressLivenessOnly(ev) } } case <-heartbeatStop: @@ -1845,6 +1863,24 @@ func emitCXDBToolTurns(ctx context.Context, eng *Engine, nodeID string, ev agent } runID := eng.Options.RunID switch ev.Kind { + case agent.EventAssistantTextEnd: + text := strings.TrimSpace(fmt.Sprint(ev.Data["text"])) + // Keep a queryable assistant turn even when the model turn is tool-only. + if text == "" { + text = "[tool_use]" + } + if _, _, err := eng.CXDB.Append(ctx, "com.kilroy.attractor.AssistantMessage", 1, map[string]any{ + "run_id": runID, + "node_id": nodeID, + "text": truncate(text, 8_000), + "model": "", + "input_tokens": uint64(0), + "output_tokens": uint64(0), + "tool_use_count": uint32(0), + "timestamp_ms": nowMS(), + }); err != nil { + eng.Warn(fmt.Sprintf("cxdb append AssistantMessage failed (node=%s): %v", nodeID, err)) + } case agent.EventToolCallStart: toolName := strings.TrimSpace(fmt.Sprint(ev.Data["tool_name"])) callID := strings.TrimSpace(fmt.Sprint(ev.Data["call_id"])) diff --git a/internal/attractor/engine/codergen_router_cxdb_test.go b/internal/attractor/engine/codergen_router_cxdb_test.go new file mode 100644 index 00000000..0a7b6369 --- /dev/null +++ b/internal/attractor/engine/codergen_router_cxdb_test.go @@ -0,0 +1,118 @@ +package engine + +import ( + "context" + "testing" + "time" + + "github.com/danshapiro/kilroy/internal/agent" +) + +func TestEmitCXDBToolTurns_EmitsAssistantToolCallAndToolResult(t *testing.T) { + srv := newCXDBTestServer(t) + eng := newTestEngineWithCXDB(t, srv) + ctx := context.Background() + + emitCXDBToolTurns(ctx, eng, "node_api", agent.SessionEvent{ + Kind: agent.EventAssistantTextEnd, + Timestamp: time.Now(), + Data: map[string]any{ + "text": "I will read the file.", + }, + }) + emitCXDBToolTurns(ctx, eng, "node_api", agent.SessionEvent{ + Kind: agent.EventToolCallStart, + Timestamp: time.Now(), + Data: map[string]any{ + "tool_name": "Read", + "call_id": "toolu_123", + "arguments_json": `{"file_path":"/tmp/a.txt"}`, + }, + }) + emitCXDBToolTurns(ctx, eng, "node_api", agent.SessionEvent{ + Kind: agent.EventToolCallEnd, + Timestamp: time.Now(), + Data: map[string]any{ + "tool_name": "Read", + "call_id": "toolu_123", + "full_output": "hello world", + "is_error": false, + }, + }) + + turns := srv.Turns(eng.CXDB.ContextID) + if len(turns) != 3 { + t.Fatalf("expected 3 turns, got %d", len(turns)) + } + + if turns[0]["type_id"] != "com.kilroy.attractor.AssistantMessage" { + t.Fatalf("turn[0] type_id: got %q", turns[0]["type_id"]) + } + assistant := turns[0]["payload"].(map[string]any) + if assistant["text"] != "I will read the file." { + t.Fatalf("AssistantMessage text: got %q", assistant["text"]) + } + if assistant["node_id"] != "node_api" { + t.Fatalf("AssistantMessage node_id: got %q", assistant["node_id"]) + } + if assistant["run_id"] != "test-run" { + t.Fatalf("AssistantMessage run_id: got %q", assistant["run_id"]) + } + if _, ok := assistant["timestamp_ms"]; !ok { + t.Fatal("AssistantMessage missing timestamp_ms") + } + + if turns[1]["type_id"] != "com.kilroy.attractor.ToolCall" { + t.Fatalf("turn[1] type_id: got %q", turns[1]["type_id"]) + } + toolCall := turns[1]["payload"].(map[string]any) + if toolCall["tool_name"] != "Read" { + t.Fatalf("ToolCall tool_name: got %q", toolCall["tool_name"]) + } + if toolCall["call_id"] != "toolu_123" { + t.Fatalf("ToolCall call_id: got %q", toolCall["call_id"]) + } + + if turns[2]["type_id"] != "com.kilroy.attractor.ToolResult" { + t.Fatalf("turn[2] type_id: got %q", turns[2]["type_id"]) + } + toolResult := turns[2]["payload"].(map[string]any) + if toolResult["tool_name"] != "Read" { + t.Fatalf("ToolResult tool_name: got %q", toolResult["tool_name"]) + } + if toolResult["call_id"] != "toolu_123" { + t.Fatalf("ToolResult call_id: got %q", toolResult["call_id"]) + } + if toolResult["output"] != "hello world" { + t.Fatalf("ToolResult output: got %q", toolResult["output"]) + } + if toolResult["is_error"] != false { + t.Fatalf("ToolResult is_error: got %v", toolResult["is_error"]) + } +} + +func TestEmitCXDBToolTurns_AssistantTextFallbackForToolOnlyTurn(t *testing.T) { + srv := newCXDBTestServer(t) + eng := newTestEngineWithCXDB(t, srv) + ctx := context.Background() + + emitCXDBToolTurns(ctx, eng, "node_api", agent.SessionEvent{ + Kind: agent.EventAssistantTextEnd, + Timestamp: time.Now(), + Data: map[string]any{ + "text": " ", + }, + }) + + turns := srv.Turns(eng.CXDB.ContextID) + if len(turns) != 1 { + t.Fatalf("expected 1 turn, got %d", len(turns)) + } + if turns[0]["type_id"] != "com.kilroy.attractor.AssistantMessage" { + t.Fatalf("type_id: got %q", turns[0]["type_id"]) + } + payload := turns[0]["payload"].(map[string]any) + if payload["text"] != "[tool_use]" { + t.Fatalf("fallback text: got %q", payload["text"]) + } +} diff --git a/internal/attractor/engine/engine.go b/internal/attractor/engine/engine.go index 43aa0a2d..66e3ebfd 100644 --- a/internal/attractor/engine/engine.go +++ b/internal/attractor/engine/engine.go @@ -373,10 +373,14 @@ func (e *Engine) run(ctx context.Context) (res *Result, err error) { runCtx, cancelRun := context.WithCancelCause(ctx) defer cancelRun(nil) + var releaseOwnership func() defer func() { - if err != nil { + if err != nil && !isRunOwnershipConflict(err) { e.persistFatalOutcome(ctx, err) } + if releaseOwnership != nil { + releaseOwnership() + } }() if e.Options.RepoPath == "" { @@ -403,6 +407,10 @@ func (e *Engine) run(ctx context.Context) (res *Result, err error) { if err := os.MkdirAll(e.LogsRoot, 0o755); err != nil { return nil, err } + releaseOwnership, err = acquireRunOwnership(e.LogsRoot, e.Options.RunID) + if err != nil { + return nil, err + } // Record PID so attractor status can detect a running process. _ = os.WriteFile(filepath.Join(e.LogsRoot, "run.pid"), []byte(strconv.Itoa(os.Getpid())), 0o644) // Snapshot the run config for repeatability and resume. @@ -1223,7 +1231,7 @@ func (e *Engine) executeWithRetry(ctx context.Context, node *model.Node, retries _ = writeJSON(filepath.Join(stageDir, "status.json"), fo) return fo, nil } - if out.Status == runtime.StatusSuccess || out.Status == runtime.StatusPartialSuccess || out.Status == runtime.StatusSkipped { + if out.Status == runtime.StatusSuccess || out.Status == runtime.StatusDegradedSuccess || out.Status == runtime.StatusPartialSuccess || out.Status == runtime.StatusSkipped { retries[node.ID] = 0 return out, nil } @@ -1957,7 +1965,7 @@ func checkGoalGates(g *model.Graph, outcomes map[string]runtime.Outcome) (bool, if !strings.EqualFold(n.Attr("goal_gate", "false"), "true") { continue } - if out.Status != runtime.StatusSuccess && out.Status != runtime.StatusPartialSuccess { + if out.Status != runtime.StatusSuccess && out.Status != runtime.StatusDegradedSuccess && out.Status != runtime.StatusPartialSuccess { return false, id } } diff --git a/internal/attractor/engine/handlers.go b/internal/attractor/engine/handlers.go index 8bcd9813..9567c2d1 100644 --- a/internal/attractor/engine/handlers.go +++ b/internal/attractor/engine/handlers.go @@ -4,7 +4,9 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" + "io" "os" "os/exec" "path/filepath" @@ -235,25 +237,128 @@ type fallbackStatusPath struct { source statusSource } -func copyFirstValidFallbackStatus(stageStatusPath string, fallbackPaths []fallbackStatusPath) (statusSource, error) { +type fallbackStatusFailureMode string + +const ( + fallbackFailureModeNone fallbackStatusFailureMode = "" + fallbackFailureModeMissing fallbackStatusFailureMode = "missing" + fallbackFailureModeUnreadable fallbackStatusFailureMode = "unreadable" + fallbackFailureModeCorrupt fallbackStatusFailureMode = "corrupt" + fallbackFailureModeInvalidPayload fallbackStatusFailureMode = "invalid_payload" +) + +const ( + fallbackStatusDecodeMaxAttempts = 3 + fallbackStatusDecodeBaseDelay = 25 * time.Millisecond +) + +func copyFirstValidFallbackStatus(stageStatusPath string, fallbackPaths []fallbackStatusPath) (statusSource, string, error) { if _, err := os.Stat(stageStatusPath); err == nil { - return statusSourceCanonical, nil + return statusSourceCanonical, "", nil } + issues := make([]string, 0, len(fallbackPaths)) for _, fallback := range fallbackPaths { - b, err := os.ReadFile(fallback.path) + b, mode, err := readAndDecodeFallbackStatusWithRetry(fallback.path) if err != nil { - continue - } - if _, err := runtime.DecodeOutcomeJSON(b); err != nil { + issues = append(issues, formatFallbackStatusIssue(fallback, mode, err)) continue } if err := runtime.WriteFileAtomic(stageStatusPath, b); err != nil { - return statusSourceNone, err + return statusSourceNone, strings.Join(issues, "; "), err } _ = os.Remove(fallback.path) - return fallback.source, nil + return fallback.source, "", nil + } + return statusSourceNone, strings.Join(issues, "; "), nil +} + +func readAndDecodeFallbackStatusWithRetry(path string) ([]byte, fallbackStatusFailureMode, error) { + var lastErr error + mode := fallbackFailureModeMissing + for attempt := 1; attempt <= fallbackStatusDecodeMaxAttempts; attempt++ { + b, err := os.ReadFile(path) + if err != nil { + lastErr = err + if os.IsNotExist(err) { + mode = fallbackFailureModeMissing + } else { + mode = fallbackFailureModeUnreadable + } + if attempt < fallbackStatusDecodeMaxAttempts && shouldRetryFallbackRead(mode, err) { + time.Sleep(backoffDelay(attempt)) + continue + } + return nil, mode, lastErr + } + if _, err := runtime.DecodeOutcomeJSON(b); err != nil { + lastErr = err + mode = classifyFallbackDecodeError(b, err) + if attempt < fallbackStatusDecodeMaxAttempts && shouldRetryFallbackDecode(mode, b, err) { + time.Sleep(backoffDelay(attempt)) + continue + } + return nil, mode, lastErr + } + return b, fallbackFailureModeNone, nil + } + return nil, mode, lastErr +} + +func classifyFallbackDecodeError(raw []byte, err error) fallbackStatusFailureMode { + if isCorruptFallbackPayload(raw, err) { + return fallbackFailureModeCorrupt + } + return fallbackFailureModeInvalidPayload +} + +func shouldRetryFallbackRead(mode fallbackStatusFailureMode, err error) bool { + if mode == fallbackFailureModeMissing { + // Missing fallback files are expected in normal flows; retrying them + // just adds latency before trying the next candidate path. + return false + } + return errors.Is(err, io.ErrUnexpectedEOF) +} + +func shouldRetryFallbackDecode(mode fallbackStatusFailureMode, raw []byte, err error) bool { + return mode == fallbackFailureModeCorrupt && isCorruptFallbackPayload(raw, err) +} + +func isCorruptFallbackPayload(raw []byte, err error) bool { + if len(strings.TrimSpace(string(raw))) == 0 { + return true + } + var syntaxErr *json.SyntaxError + if errors.As(err, &syntaxErr) { + return true + } + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + return true + } + msg := strings.ToLower(strings.TrimSpace(err.Error())) + return strings.Contains(msg, "unexpected end of json input") || strings.Contains(msg, "invalid character") +} + +func backoffDelay(attempt int) time.Duration { + if attempt <= 0 { + attempt = 1 + } + return time.Duration(attempt) * fallbackStatusDecodeBaseDelay +} + +func formatFallbackStatusIssue(fallback fallbackStatusPath, mode fallbackStatusFailureMode, err error) string { + switch mode { + case fallbackFailureModeMissing: + return fmt.Sprintf("fallback[%s] missing status artifact: %s", fallback.source, fallback.path) + case fallbackFailureModeUnreadable: + return fmt.Sprintf("fallback[%s] unreadable status artifact: %s (%v)", fallback.source, fallback.path, err) + case fallbackFailureModeCorrupt: + return fmt.Sprintf("fallback[%s] corrupt status artifact: %s (%v)", fallback.source, fallback.path, err) + case fallbackFailureModeInvalidPayload: + return fmt.Sprintf("fallback[%s] invalid status payload: %s (%v)", fallback.source, fallback.path, err) + default: + return fmt.Sprintf("fallback[%s] status ingestion error: %s (%v)", fallback.source, fallback.path, err) } - return statusSourceNone, nil } func buildManualBoxFanInPromptPreamble(exec *Execution, node *model.Node) string { @@ -468,20 +573,29 @@ func (h *CodergenHandler) Execute(ctx context.Context, exec *Execution, node *mo // If the backend/agent wrote a worktree status.json, surface it to the engine by // copying it into the authoritative stage directory location. source := statusSourceNone + ingestionDiagnostic := "" if len(worktreeStatusPaths) > 0 { var err error - source, err = copyFirstValidFallbackStatus(stageStatusPath, worktreeStatusPaths) + source, ingestionDiagnostic, err = copyFirstValidFallbackStatus(stageStatusPath, worktreeStatusPaths) if err != nil { - return runtime.Outcome{Status: runtime.StatusFail, FailureReason: err.Error()}, nil + reason := err.Error() + if strings.TrimSpace(ingestionDiagnostic) != "" { + reason = reason + "; " + strings.TrimSpace(ingestionDiagnostic) + } + return runtime.Outcome{Status: runtime.StatusFail, FailureReason: reason}, nil } } if exec != nil && exec.Engine != nil { - exec.Engine.appendProgress(map[string]any{ + progress := map[string]any{ "event": "status_ingestion_decision", "node_id": node.ID, "source": string(source), "copied": source == statusSourceWorktree || source == statusSourceDotAI, - }) + } + if strings.TrimSpace(ingestionDiagnostic) != "" { + progress["diagnostic"] = strings.TrimSpace(ingestionDiagnostic) + } + exec.Engine.appendProgress(progress) } if out != nil { @@ -523,9 +637,13 @@ func (h *CodergenHandler) Execute(ctx context.Context, exec *Execution, node *mo }, }, nil } + reason := "missing status.json (auto_status=false)" + if strings.TrimSpace(ingestionDiagnostic) != "" { + reason = reason + "; " + strings.TrimSpace(ingestionDiagnostic) + } return runtime.Outcome{ Status: runtime.StatusFail, - FailureReason: "missing status.json (auto_status=false)", + FailureReason: reason, Notes: "codergen completed without an outcome or status.json", ContextUpdates: map[string]any{ "last_stage": node.ID, diff --git a/internal/attractor/engine/input_manifest_contract_test.go b/internal/attractor/engine/input_manifest_contract_test.go index 210b4657..2cf0948e 100644 --- a/internal/attractor/engine/input_manifest_contract_test.go +++ b/internal/attractor/engine/input_manifest_contract_test.go @@ -73,6 +73,7 @@ digraph G { func TestInputManifestContract_DisabledSuppressesEnvPreambleAndEvents(t *testing.T) { repo := initTestRepo(t) logsRoot := t.TempDir() + t.Setenv(inputsManifestEnvKey, "/tmp/ambient-inputs-manifest.json") mustWriteInputFile(t, filepath.Join(repo, ".ai", "definition_of_done.md"), "line by line") cli := writeInputManifestProbeCLI(t, false) diff --git a/internal/attractor/engine/input_materialization.go b/internal/attractor/engine/input_materialization.go index 932c24e5..83bc2aa7 100644 --- a/internal/attractor/engine/input_materialization.go +++ b/internal/attractor/engine/input_materialization.go @@ -752,7 +752,7 @@ func isLikelyArtifactInputPath(path string) bool { segments := strings.Split(normalized, "/") for _, seg := range segments { switch seg { - case ".git", ".jj", "logs", "benchmarks", "worktree", "node_modules", ".pnpm-store", ".venv", "venv", "__pycache__", ".pytest_cache", "dist-info", "managed": + case ".git", ".jj", "logs", "benchmarks", "worktree", ".worktrees", "node_modules", ".pnpm-store", ".venv", "venv", "__pycache__", ".pytest_cache", "dist-info", "managed", ".cargo-target": return true } } diff --git a/internal/attractor/engine/input_reference_scan.go b/internal/attractor/engine/input_reference_scan.go index 761aabaa..16a61b3b 100644 --- a/internal/attractor/engine/input_reference_scan.go +++ b/internal/attractor/engine/input_reference_scan.go @@ -163,13 +163,17 @@ func looksLikeReferenceToken(token string) bool { if strings.ContainsAny(token, "<>|") { return false } + // Reject tokens with spaces - they are natural language, not paths/globs. + if strings.ContainsAny(token, " \t") { + return false + } if strings.Contains(token, "/") || strings.Contains(token, "\\") { return true } if windowsAbsPathRE.MatchString(token) { return true } - if strings.ContainsAny(token, "*?[") { + if looksLikeGlobPattern(token) { return true } return false @@ -182,7 +186,7 @@ func looksLikeStructuredReferenceToken(token string) bool { if strings.Contains(token, "](") || strings.ContainsAny(token, "<>|") { return false } - if strings.ContainsAny(token, "*?[") { + if looksLikeGlobPattern(token) { return true } // Structured captures (markdown links/quoted tokens) may contain local file @@ -194,8 +198,44 @@ func looksLikeStructuredReferenceToken(token string) bool { } func classifyReferenceKind(pattern string) InputReferenceKind { - if strings.ContainsAny(pattern, "*?[") { + if looksLikeGlobPattern(pattern) { return InputReferenceKindGlob } return InputReferenceKindPath } + +// looksLikeGlobPattern returns true if token contains valid glob metacharacters. +// Tokens containing * or ? are always treated as globs. Tokens containing [ +// are only treated as globs if the bracket forms a valid character class (i.e. +// has a matching ]) AND the token also has path-like context (a path separator, +// * or ?) - this prevents natural language fragments like +// "DEFAULT_TOOL_LIMITS[tool_name" and programming constructs like +// "map[string]any" from being classified as globs. +func looksLikeGlobPattern(token string) bool { + if strings.ContainsAny(token, "*?") { + return true + } + if !strings.Contains(token, "[") { + return false + } + // Validate that every [ has a matching ] (valid character class). + hasValidBracket := false + for i := 0; i < len(token); i++ { + if token[i] == '[' { + j := strings.IndexByte(token[i+1:], ']') + if j < 0 { + // Unmatched [ - not a valid glob. + return false + } + hasValidBracket = true + // Skip past the matched ] + i += j + 1 + } + } + if !hasValidBracket { + return false + } + // Brackets alone (e.g. "map[string]any") are not enough to be a glob. + // Require a path separator to disambiguate from programming constructs. + return strings.ContainsAny(token, "/\\") +} diff --git a/internal/attractor/engine/input_reference_scan_test.go b/internal/attractor/engine/input_reference_scan_test.go index 99425ff4..7850b19a 100644 --- a/internal/attractor/engine/input_reference_scan_test.go +++ b/internal/attractor/engine/input_reference_scan_test.go @@ -44,6 +44,59 @@ func TestInputReferenceScan_IgnoresURLsAndParsesWindowsGlobToken(t *testing.T) { requireRefKind(t, got, "docs/spec.md", InputReferenceKindPath) } +func TestInputReferenceScan_RejectsNaturalLanguageWithBrackets(t *testing.T) { + scanner := deterministicInputReferenceScanner{} + // These tokens from issue #48 should NOT be classified as globs. + badTokens := []string{ + `you are wielding ([ch]) [weapon`, + `DEFAULT_TOOL_LIMITS[tool_name`, + `array[index`, + `map[string]any`, + } + for _, token := range badTokens { + refs := scanner.Scan("test.md", []byte(token)) + for _, ref := range refs { + if ref.Kind == InputReferenceKindGlob { + t.Errorf("token %q should not be classified as glob, but got pattern=%q kind=%q", token, ref.Pattern, ref.Kind) + } + } + } +} + +func TestInputReferenceScan_AcceptsValidGlobBrackets(t *testing.T) { + scanner := deterministicInputReferenceScanner{} + // These ARE valid globs with matched brackets. + content := strings.Join([]string{ + `"src/[abc]/*.go"`, + `"docs/[a-z]*.md"`, + }, "\n") + refs := scanner.Scan("test.md", []byte(content)) + got := map[string]InputReferenceKind{} + for _, ref := range refs { + got[ref.Pattern] = ref.Kind + } + requireRefKind(t, got, "src/[abc]/*.go", InputReferenceKindGlob) + requireRefKind(t, got, "docs/[a-z]*.md", InputReferenceKindGlob) +} + +func TestIsLikelyArtifactInputPath_ExcludesWorktrees(t *testing.T) { + cases := []struct { + path string + want bool + }{ + {".worktrees/rogue-logs/worktree/.cargo-target/foo.rs", true}, + {".worktrees/some-branch/src/main.go", true}, + {"src/.cargo-target/debug/build/foo.o", true}, + {"src/main.go", false}, + {"docs/spec.md", false}, + } + for _, tc := range cases { + if got := isLikelyArtifactInputPath(tc.path); got != tc.want { + t.Errorf("isLikelyArtifactInputPath(%q) = %v, want %v", tc.path, got, tc.want) + } + } +} + func requireRefKind(t *testing.T, got map[string]InputReferenceKind, pattern string, want InputReferenceKind) { t.Helper() kind, ok := got[pattern] diff --git a/internal/attractor/engine/main_test.go b/internal/attractor/engine/main_test.go new file mode 100644 index 00000000..18227096 --- /dev/null +++ b/internal/attractor/engine/main_test.go @@ -0,0 +1,27 @@ +package engine + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestMain(m *testing.M) { + stateRoot, err := os.MkdirTemp("", "kilroy-engine-test-state-*") + if err != nil { + _, _ = os.Stderr.WriteString("failed to create temp state root: " + err.Error() + "\n") + os.Exit(2) + } + + if strings.TrimSpace(os.Getenv("XDG_STATE_HOME")) == "" { + _ = os.Setenv("XDG_STATE_HOME", stateRoot) + } + if strings.TrimSpace(os.Getenv("KILROY_CODEX_STATE_BASE")) == "" { + _ = os.Setenv("KILROY_CODEX_STATE_BASE", filepath.Join(stateRoot, "kilroy", "attractor", "codex-state")) + } + + code := m.Run() + _ = os.RemoveAll(stateRoot) + os.Exit(code) +} diff --git a/internal/attractor/engine/node_env.go b/internal/attractor/engine/node_env.go index 317bf6a2..b21479b2 100644 --- a/internal/attractor/engine/node_env.go +++ b/internal/attractor/engine/node_env.go @@ -15,12 +15,27 @@ const ( inputsManifestEnvKey = "KILROY_INPUTS_MANIFEST_PATH" ) +var baseNodeEnvStripKeys = []string{ + "CLAUDECODE", + runIDEnvKey, + nodeIDEnvKey, + logsRootEnvKey, + stageLogsDirEnvKey, + worktreeDirEnvKey, + inputsManifestEnvKey, + stageStatusPathEnvKey, + stageStatusFallbackPathEnvKey, +} + // buildBaseNodeEnv constructs the base environment for any node execution. // It starts from os.Environ(), strips CLAUDECODE, then applies resolved // artifact policy environment variables. func buildBaseNodeEnv(rp ResolvedArtifactPolicy) []string { base := os.Environ() - base = stripEnvKey(base, "CLAUDECODE") + // Scrub inherited process state that can corrupt stage-local contracts. + for _, key := range baseNodeEnvStripKeys { + base = stripEnvKey(base, key) + } overrides := make(map[string]string, len(rp.Env.Vars)) for k, v := range rp.Env.Vars { diff --git a/internal/attractor/engine/node_env_test.go b/internal/attractor/engine/node_env_test.go index 94f770bf..01fe8304 100644 --- a/internal/attractor/engine/node_env_test.go +++ b/internal/attractor/engine/node_env_test.go @@ -59,6 +59,29 @@ func TestBuildBaseNodeEnv_StripsClaudeCode(t *testing.T) { } } +func TestBuildBaseNodeEnv_StripsKilroyContractEnvKeys(t *testing.T) { + outerValues := map[string]string{ + runIDEnvKey: "outer-run", + nodeIDEnvKey: "outer-node", + logsRootEnvKey: "/tmp/outer-logs", + stageLogsDirEnvKey: "/tmp/outer-logs/outer-node", + worktreeDirEnvKey: "/tmp/outer-worktree", + inputsManifestEnvKey: "/tmp/outer-inputs-manifest.json", + stageStatusPathEnvKey: "/tmp/outer-status.json", + stageStatusFallbackPathEnvKey: "/tmp/outer-fallback-status.json", + } + for key, value := range outerValues { + t.Setenv(key, value) + } + + env := buildBaseNodeEnv(ResolvedArtifactPolicy{}) + for key := range outerValues { + if envHasKey(env, key) { + t.Fatalf("%s should be stripped from base env", key) + } + } +} + func TestBuildBaseNodeEnv_PreservesExplicitToolchainPaths(t *testing.T) { home := t.TempDir() cargoHome := filepath.Join(home, ".cargo") diff --git a/internal/attractor/engine/progress.go b/internal/attractor/engine/progress.go index ba25b9f1..07a5c636 100644 --- a/internal/attractor/engine/progress.go +++ b/internal/attractor/engine/progress.go @@ -17,6 +17,17 @@ import ( // // This is best-effort: progress logging must never block or fail a run. func (e *Engine) appendProgress(ev map[string]any) { + e.appendProgressImpl(ev, true) +} + +// appendProgressLivenessOnly writes a progress event to progress.ndjson and +// live.json for observability but does NOT reset the stall watchdog timer. +// Use this for unconditional heartbeat emissions that should not mask true stalls. +func (e *Engine) appendProgressLivenessOnly(ev map[string]any) { + e.appendProgressImpl(ev, false) +} + +func (e *Engine) appendProgressImpl(ev map[string]any, updateStallTimer bool) { if e == nil { return } @@ -47,7 +58,9 @@ func (e *Engine) appendProgress(ev map[string]any) { e.progressMu.Lock() defer e.progressMu.Unlock() - e.lastProgressAt = now + if updateStallTimer { + e.lastProgressAt = now + } // Append to progress.ndjson. // Intentionally open/close on each event so writes are immediately flushed diff --git a/internal/attractor/engine/prompts/stage_status_contract_preamble.tmpl b/internal/attractor/engine/prompts/stage_status_contract_preamble.tmpl index 5522ce6b..eb48bef4 100644 --- a/internal/attractor/engine/prompts/stage_status_contract_preamble.tmpl +++ b/internal/attractor/engine/prompts/stage_status_contract_preamble.tmpl @@ -4,3 +4,6 @@ ${{.StageStatusFallbackPathEnvKey}} = {{.FallbackPath}} - Write your status JSON to ${{.StageStatusPathEnvKey}}. If that write fails, use run-scoped fallback ${{.StageStatusFallbackPathEnvKey}}. - Do not write status.json to nested module directories. - To end this stage: write the status file, then return your response. Do NOT call close_agent or any session-management tool. +- Verification reporting: if you ran verification commands (tests, linters, type checks) and any failed or were blocked by infra issues (missing tools, DNS errors, network failures), you MUST set status to "degraded_success" instead of "success" and include a "verification" object: + {"status": "degraded_success", "verification": {"status": "blocked", "blocked_reason": "npm registry DNS failure", "commands": [{"command": "npx tsc", "exit_code": 1, "blocked": true, "reason": "EAI_AGAIN"}]}, ...} +- Use "success" only when all verification commands actually executed and passed, or when no verification was needed. diff --git a/internal/attractor/engine/resume.go b/internal/attractor/engine/resume.go index bb71bcdc..eed7708f 100644 --- a/internal/attractor/engine/resume.go +++ b/internal/attractor/engine/resume.go @@ -70,26 +70,26 @@ func resumeFromLogsRoot(ctx context.Context, logsRoot string, ov ResumeOverrides runID string checkpointSHA string eng *Engine + releaseLock func() ) defer func() { - if err == nil { - return - } - if eng != nil { - eng.persistFatalOutcome(ctx, err) - return - } - if strings.TrimSpace(logsRoot) == "" || strings.TrimSpace(runID) == "" { - return + if err != nil && !isRunOwnershipConflict(err) { + if eng != nil { + eng.persistFatalOutcome(ctx, err) + } else if strings.TrimSpace(logsRoot) != "" && strings.TrimSpace(runID) != "" { + final := runtime.FinalOutcome{ + Timestamp: time.Now().UTC(), + Status: runtime.FinalFail, + RunID: runID, + FinalGitCommitSHA: strings.TrimSpace(checkpointSHA), + FailureReason: strings.TrimSpace(err.Error()), + } + _ = final.Save(filepath.Join(logsRoot, "final.json")) + } } - final := runtime.FinalOutcome{ - Timestamp: time.Now().UTC(), - Status: runtime.FinalFail, - RunID: runID, - FinalGitCommitSHA: strings.TrimSpace(checkpointSHA), - FailureReason: strings.TrimSpace(err.Error()), + if releaseLock != nil { + releaseLock() } - _ = final.Save(filepath.Join(logsRoot, "final.json")) }() m, err := loadManifest(filepath.Join(logsRoot, "manifest.json")) @@ -97,6 +97,10 @@ func resumeFromLogsRoot(ctx context.Context, logsRoot string, ov ResumeOverrides return nil, err } runID = strings.TrimSpace(m.RunID) + releaseLock, err = acquireRunOwnership(logsRoot, runID) + if err != nil { + return nil, err + } cp, err := runtime.LoadCheckpoint(filepath.Join(logsRoot, "checkpoint.json")) if err != nil { return nil, err diff --git a/internal/attractor/engine/run_ownership_lock.go b/internal/attractor/engine/run_ownership_lock.go new file mode 100644 index 00000000..cdddfbd3 --- /dev/null +++ b/internal/attractor/engine/run_ownership_lock.go @@ -0,0 +1,185 @@ +package engine + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/danshapiro/kilroy/internal/attractor/procutil" +) + +const ( + runOwnershipLockFile = "run.lock.json" + runOwnershipAcquireAttempts = 20 + runOwnershipRetryDelay = 25 * time.Millisecond +) + +type runOwnershipRecord struct { + PID int `json:"pid"` + PIDStartTime uint64 `json:"pid_start_time,omitempty"` + RunID string `json:"run_id,omitempty"` + AcquiredAt string `json:"acquired_at,omitempty"` +} + +type runOwnershipConflictError struct { + LogsRoot string + LockPath string + ExistingPID int + ExistingRun string + Reason string +} + +func (e *runOwnershipConflictError) Error() string { + if e == nil { + return "run ownership conflict" + } + msg := fmt.Sprintf("logs_root %q is already owned", e.LogsRoot) + if e.ExistingPID > 0 { + msg += fmt.Sprintf(" by a live process (pid=%d)", e.ExistingPID) + } else { + msg += " by another process" + } + if strings.TrimSpace(e.ExistingRun) != "" { + msg += fmt.Sprintf(" run_id=%q", e.ExistingRun) + } + if strings.TrimSpace(e.LockPath) != "" { + msg += fmt.Sprintf(" lock=%q", e.LockPath) + } + if strings.TrimSpace(e.Reason) != "" { + msg += fmt.Sprintf(" (%s)", strings.TrimSpace(e.Reason)) + } + return msg +} + +func isRunOwnershipConflict(err error) bool { + var ownershipErr *runOwnershipConflictError + return errors.As(err, &ownershipErr) +} + +func acquireRunOwnership(logsRoot, runID string) (func(), error) { + root := strings.TrimSpace(logsRoot) + if root == "" { + return nil, fmt.Errorf("logs_root is required") + } + lockPath := filepath.Join(root, runOwnershipLockFile) + record := runOwnershipRecord{ + PID: os.Getpid(), + RunID: strings.TrimSpace(runID), + AcquiredAt: time.Now().UTC().Format(time.RFC3339Nano), + } + if startTime, err := procutil.ReadPIDStartTime(record.PID); err == nil && startTime > 0 { + record.PIDStartTime = startTime + } + payload, err := json.MarshalIndent(record, "", " ") + if err != nil { + return nil, fmt.Errorf("encode run ownership: %w", err) + } + // Keep payload newline-friendly for manual inspection. + payload = append(payload, '\n') + + var lastReadErr error + for attempts := 0; attempts < runOwnershipAcquireAttempts; attempts++ { + f, openErr := os.OpenFile(lockPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0o644) + if openErr == nil { + if _, writeErr := f.Write(payload); writeErr != nil { + _ = f.Close() + _ = os.Remove(lockPath) + return nil, fmt.Errorf("write run ownership lock %q: %w", lockPath, writeErr) + } + if closeErr := f.Close(); closeErr != nil { + _ = os.Remove(lockPath) + return nil, fmt.Errorf("close run ownership lock %q: %w", lockPath, closeErr) + } + pid := record.PID + start := record.PIDStartTime + return func() { + releaseRunOwnership(lockPath, pid, start) + }, nil + } + if !errors.Is(openErr, os.ErrExist) { + return nil, fmt.Errorf("create run ownership lock %q: %w", lockPath, openErr) + } + + existing, readErr := readRunOwnership(lockPath) + if readErr != nil { + lastReadErr = readErr + time.Sleep(runOwnershipRetryDelay) + continue + } + + if runOwnershipMatchesLiveProcess(existing) { + return nil, &runOwnershipConflictError{ + LogsRoot: root, + LockPath: lockPath, + ExistingPID: existing.PID, + ExistingRun: strings.TrimSpace(existing.RunID), + } + } + // Stale ownership record, best-effort cleanup and retry. + if removeErr := os.Remove(lockPath); removeErr != nil && !errors.Is(removeErr, os.ErrNotExist) { + return nil, fmt.Errorf("remove stale run ownership lock %q: %w", lockPath, removeErr) + } + } + if lastReadErr != nil { + return nil, &runOwnershipConflictError{ + LogsRoot: root, + LockPath: lockPath, + Reason: fmt.Sprintf("ownership lock unreadable after retries: %v", lastReadErr), + } + } + return nil, fmt.Errorf("acquire run ownership lock %q: exhausted retries", lockPath) +} + +func readRunOwnership(lockPath string) (runOwnershipRecord, error) { + b, err := os.ReadFile(lockPath) + if err != nil { + return runOwnershipRecord{}, err + } + if strings.TrimSpace(string(b)) == "" { + return runOwnershipRecord{}, fmt.Errorf("empty lock payload") + } + var rec runOwnershipRecord + if err := json.Unmarshal(b, &rec); err != nil { + return runOwnershipRecord{}, err + } + if rec.PID <= 0 { + return runOwnershipRecord{}, fmt.Errorf("invalid pid %d", rec.PID) + } + return rec, nil +} + +func runOwnershipMatchesLiveProcess(rec runOwnershipRecord) bool { + if rec.PID <= 0 || !procutil.PIDAlive(rec.PID) { + return false + } + if rec.PIDStartTime == 0 { + return true + } + start, err := procutil.ReadPIDStartTime(rec.PID) + if err != nil || start == 0 { + // Cannot disambiguate; keep conservative conflict behavior. + return true + } + return start == rec.PIDStartTime +} + +func releaseRunOwnership(lockPath string, ownerPID int, ownerStartTime uint64) { + if strings.TrimSpace(lockPath) == "" || ownerPID <= 0 { + return + } + rec, err := readRunOwnership(lockPath) + if err != nil { + return + } + if rec.PID != ownerPID { + return + } + if ownerStartTime > 0 && rec.PIDStartTime > 0 && rec.PIDStartTime != ownerStartTime { + return + } + _ = os.Remove(lockPath) +} diff --git a/internal/attractor/engine/run_ownership_lock_test.go b/internal/attractor/engine/run_ownership_lock_test.go new file mode 100644 index 00000000..e6c286a6 --- /dev/null +++ b/internal/attractor/engine/run_ownership_lock_test.go @@ -0,0 +1,297 @@ +package engine + +import ( + "context" + "encoding/json" + "errors" + "os" + "os/exec" + "path/filepath" + "testing" + "time" + + "github.com/danshapiro/kilroy/internal/attractor/procutil" +) + +func TestAcquireRunOwnership_RejectsLiveOwner(t *testing.T) { + t.Parallel() + + logsRoot := t.TempDir() + lockPath := filepath.Join(logsRoot, runOwnershipLockFile) + ownerPID, ownerStart := startSleepingProcess(t) + owner := runOwnershipRecord{ + PID: ownerPID, + PIDStartTime: ownerStart, + RunID: "existing-run", + } + if err := writeOwnershipRecord(lockPath, owner); err != nil { + t.Fatalf("writeOwnershipRecord: %v", err) + } + + _, err := acquireRunOwnership(logsRoot, "new-run") + if err == nil { + t.Fatalf("expected ownership conflict error, got nil") + } + var ownershipErr *runOwnershipConflictError + if !errors.As(err, &ownershipErr) { + t.Fatalf("expected runOwnershipConflictError, got %T (%v)", err, err) + } + if ownershipErr.ExistingPID != ownerPID { + t.Fatalf("existing pid: got %d want %d", ownershipErr.ExistingPID, ownerPID) + } +} + +func TestAcquireRunOwnership_ReclaimsStaleOwner(t *testing.T) { + t.Parallel() + + logsRoot := t.TempDir() + lockPath := filepath.Join(logsRoot, runOwnershipLockFile) + owner := runOwnershipRecord{ + PID: 999999, // best-effort stale PID + PIDStartTime: 123456, + RunID: "stale-run", + } + if err := writeOwnershipRecord(lockPath, owner); err != nil { + t.Fatalf("writeOwnershipRecord: %v", err) + } + + release, err := acquireRunOwnership(logsRoot, "new-run") + if err != nil { + t.Fatalf("acquireRunOwnership: %v", err) + } + defer release() + + got, err := readRunOwnership(lockPath) + if err != nil { + t.Fatalf("readRunOwnership: %v", err) + } + if got.PID != os.Getpid() { + t.Fatalf("owner pid: got %d want %d", got.PID, os.Getpid()) + } + if got.RunID != "new-run" { + t.Fatalf("owner run_id: got %q want %q", got.RunID, "new-run") + } +} + +func TestAcquireRunOwnership_UnreadableLockConflicts(t *testing.T) { + t.Parallel() + + logsRoot := t.TempDir() + lockPath := filepath.Join(logsRoot, runOwnershipLockFile) + if err := os.WriteFile(lockPath, []byte("{"), 0o644); err != nil { + t.Fatalf("write unreadable lock: %v", err) + } + + _, err := acquireRunOwnership(logsRoot, "new-run") + if err != nil { + var ownershipErr *runOwnershipConflictError + if !errors.As(err, &ownershipErr) { + t.Fatalf("expected runOwnershipConflictError, got %T (%v)", err, err) + } + if ownershipErr.Reason == "" { + t.Fatalf("expected conflict reason for unreadable lock, got empty reason") + } + } else { + t.Fatalf("expected ownership conflict for unreadable lock, got nil") + } + if _, statErr := os.Stat(lockPath); statErr != nil { + t.Fatalf("expected unreadable lock to be preserved, stat err: %v", statErr) + } +} + +func TestAcquireRunOwnership_RetriesUnreadableLockUntilConflict(t *testing.T) { + t.Parallel() + + logsRoot := t.TempDir() + lockPath := filepath.Join(logsRoot, runOwnershipLockFile) + owner := ownerRecordForCurrentPID("existing-run") + + created := make(chan struct{}) + done := make(chan error, 1) + go func() { + f, err := os.OpenFile(lockPath, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0o644) + if err != nil { + close(created) + done <- err + return + } + close(created) + defer func() { + _ = f.Close() + }() + time.Sleep(120 * time.Millisecond) + b, err := json.Marshal(owner) + if err != nil { + done <- err + return + } + _, err = f.Write(b) + done <- err + }() + <-created + + _, err := acquireRunOwnership(logsRoot, "new-run") + if err == nil { + t.Fatalf("expected ownership conflict error, got nil") + } + if !isRunOwnershipConflict(err) { + t.Fatalf("expected run ownership conflict, got: %v", err) + } + if writeErr := <-done; writeErr != nil { + t.Fatalf("slow owner write failed: %v", writeErr) + } +} + +func TestAcquireRunOwnership_ReclaimsPIDStartTimeMismatch(t *testing.T) { + t.Parallel() + + currentStart, err := procutil.ReadPIDStartTime(os.Getpid()) + if err != nil || currentStart == 0 { + t.Skip("pid start time unavailable on this platform") + } + + logsRoot := t.TempDir() + lockPath := filepath.Join(logsRoot, runOwnershipLockFile) + owner := runOwnershipRecord{ + PID: os.Getpid(), + PIDStartTime: currentStart + 1, // force mismatch with current process identity + RunID: "stale-owner", + } + if err := writeOwnershipRecord(lockPath, owner); err != nil { + t.Fatalf("writeOwnershipRecord: %v", err) + } + + release, err := acquireRunOwnership(logsRoot, "new-run") + if err != nil { + t.Fatalf("acquireRunOwnership: %v", err) + } + defer release() + + got, err := readRunOwnership(lockPath) + if err != nil { + t.Fatalf("readRunOwnership: %v", err) + } + if got.RunID != "new-run" { + t.Fatalf("owner run_id: got %q want %q", got.RunID, "new-run") + } +} + +func TestReleaseRunOwnership_DoesNotRemoveForeignOwner(t *testing.T) { + t.Parallel() + + logsRoot := t.TempDir() + lockPath := filepath.Join(logsRoot, runOwnershipLockFile) + owner := ownerRecordForCurrentPID("owner-run") + if err := writeOwnershipRecord(lockPath, owner); err != nil { + t.Fatalf("writeOwnershipRecord: %v", err) + } + + releaseRunOwnership(lockPath, os.Getpid()+1, 0) + if _, err := os.Stat(lockPath); err != nil { + t.Fatalf("expected lock to remain for foreign owner, stat err: %v", err) + } +} + +func TestReleaseRunOwnership_DoesNotRemoveMismatchedStartTime(t *testing.T) { + t.Parallel() + + currentStart, err := procutil.ReadPIDStartTime(os.Getpid()) + if err != nil || currentStart == 0 { + t.Skip("pid start time unavailable on this platform") + } + + logsRoot := t.TempDir() + lockPath := filepath.Join(logsRoot, runOwnershipLockFile) + owner := runOwnershipRecord{ + PID: os.Getpid(), + PIDStartTime: currentStart + 1, + RunID: "owner-run", + } + if err := writeOwnershipRecord(lockPath, owner); err != nil { + t.Fatalf("writeOwnershipRecord: %v", err) + } + + releaseRunOwnership(lockPath, os.Getpid(), currentStart) + if _, err := os.Stat(lockPath); err != nil { + t.Fatalf("expected lock to remain when start time mismatches, stat err: %v", err) + } +} + +func TestRun_OwnershipConflict_DoesNotWriteFinalJSON(t *testing.T) { + t.Parallel() + + repo := initTestRepo(t) + logsRoot := t.TempDir() + lockPath := filepath.Join(logsRoot, runOwnershipLockFile) + ownerPID, ownerStart := startSleepingProcess(t) + owner := runOwnershipRecord{ + PID: ownerPID, + PIDStartTime: ownerStart, + RunID: "existing-run", + } + if err := writeOwnershipRecord(lockPath, owner); err != nil { + t.Fatalf("writeOwnershipRecord: %v", err) + } + + dot := []byte(` +digraph G { + start [shape=Mdiamond] + exit [shape=Msquare] + start -> exit +} +`) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + _, err := Run(ctx, dot, RunOptions{ + RepoPath: repo, + RunID: "ownership-conflict", + LogsRoot: logsRoot, + }) + if err == nil { + t.Fatalf("expected ownership conflict error, got nil") + } + if !isRunOwnershipConflict(err) { + t.Fatalf("expected ownership conflict, got: %v", err) + } + if _, statErr := os.Stat(filepath.Join(logsRoot, "final.json")); !errors.Is(statErr, os.ErrNotExist) { + t.Fatalf("expected no final.json for ownership conflict, stat err=%v", statErr) + } +} + +func writeOwnershipRecord(path string, rec runOwnershipRecord) error { + b, err := json.Marshal(rec) + if err != nil { + return err + } + return os.WriteFile(path, b, 0o644) +} + +func ownerRecordForCurrentPID(runID string) runOwnershipRecord { + rec := runOwnershipRecord{ + PID: os.Getpid(), + RunID: runID, + } + if start, err := procutil.ReadPIDStartTime(rec.PID); err == nil && start > 0 { + rec.PIDStartTime = start + } + return rec +} + +func startSleepingProcess(t *testing.T) (int, uint64) { + t.Helper() + cmd := exec.Command("sleep", "60") + if err := cmd.Start(); err != nil { + t.Fatalf("start sleep process: %v", err) + } + pid := cmd.Process.Pid + var start uint64 + if s, err := procutil.ReadPIDStartTime(pid); err == nil { + start = s + } + t.Cleanup(func() { + _ = cmd.Process.Kill() + _, _ = cmd.Process.Wait() + }) + return pid, start +} diff --git a/internal/attractor/engine/status_ingestion_retry_test.go b/internal/attractor/engine/status_ingestion_retry_test.go new file mode 100644 index 00000000..ce730c31 --- /dev/null +++ b/internal/attractor/engine/status_ingestion_retry_test.go @@ -0,0 +1,181 @@ +package engine + +import ( + "errors" + "io" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/danshapiro/kilroy/internal/attractor/runtime" +) + +func TestCopyFirstValidFallbackStatus_CanonicalStageStatusWins(t *testing.T) { + tmp := t.TempDir() + stageStatusPath := filepath.Join(tmp, "logs", "a", "status.json") + fallbackPath := filepath.Join(tmp, "status.json") + + if err := runtime.WriteFileAtomic(stageStatusPath, []byte(`{"status":"success","notes":"canonical"}`)); err != nil { + t.Fatalf("write canonical status: %v", err) + } + if err := os.WriteFile(fallbackPath, []byte(`{"status":"fail","failure_reason":"fallback"}`), 0o644); err != nil { + t.Fatalf("write fallback status: %v", err) + } + + source, diagnostic, err := copyFirstValidFallbackStatus(stageStatusPath, []fallbackStatusPath{ + {path: fallbackPath, source: statusSourceWorktree}, + }) + if err != nil { + t.Fatalf("copyFirstValidFallbackStatus: %v", err) + } + if source != statusSourceCanonical { + t.Fatalf("source=%q want %q", source, statusSourceCanonical) + } + if strings.TrimSpace(diagnostic) != "" { + t.Fatalf("diagnostic=%q want empty", diagnostic) + } + + b, err := os.ReadFile(stageStatusPath) + if err != nil { + t.Fatalf("read stage status: %v", err) + } + out, err := runtime.DecodeOutcomeJSON(b) + if err != nil { + t.Fatalf("decode stage status: %v", err) + } + if out.Status != runtime.StatusSuccess { + t.Fatalf("status=%q want %q", out.Status, runtime.StatusSuccess) + } +} + +func TestCopyFirstValidFallbackStatus_MissingFallbackIsDiagnosed(t *testing.T) { + tmp := t.TempDir() + stageStatusPath := filepath.Join(tmp, "logs", "a", "status.json") + missingPath := filepath.Join(tmp, "missing-status.json") + + source, diagnostic, err := copyFirstValidFallbackStatus(stageStatusPath, []fallbackStatusPath{ + {path: missingPath, source: statusSourceWorktree}, + }) + if err != nil { + t.Fatalf("copyFirstValidFallbackStatus: %v", err) + } + if source != statusSourceNone { + t.Fatalf("source=%q want empty", source) + } + if !strings.Contains(diagnostic, "missing status artifact") { + t.Fatalf("diagnostic=%q want mention of missing status artifact", diagnostic) + } +} + +func TestCopyFirstValidFallbackStatus_PermanentCorruptFallbackIsDiagnosed(t *testing.T) { + tmp := t.TempDir() + stageStatusPath := filepath.Join(tmp, "logs", "a", "status.json") + fallbackPath := filepath.Join(tmp, "status.json") + + if err := os.WriteFile(fallbackPath, []byte(`{ this is invalid json }`), 0o644); err != nil { + t.Fatalf("write corrupt fallback: %v", err) + } + + source, diagnostic, err := copyFirstValidFallbackStatus(stageStatusPath, []fallbackStatusPath{ + {path: fallbackPath, source: statusSourceWorktree}, + }) + if err != nil { + t.Fatalf("copyFirstValidFallbackStatus: %v", err) + } + if source != statusSourceNone { + t.Fatalf("source=%q want empty", source) + } + if !strings.Contains(diagnostic, "corrupt status artifact") { + t.Fatalf("diagnostic=%q want mention of corrupt status artifact", diagnostic) + } +} + +func TestCopyFirstValidFallbackStatus_RetryDecodeSucceedsAfterTransientCorruption(t *testing.T) { + tmp := t.TempDir() + stageStatusPath := filepath.Join(tmp, "logs", "a", "status.json") + fallbackPath := filepath.Join(tmp, "status.json") + + if err := os.WriteFile(fallbackPath, []byte(`{ this is invalid json }`), 0o644); err != nil { + t.Fatalf("seed transient corrupt fallback: %v", err) + } + + go func() { + time.Sleep(fallbackStatusDecodeBaseDelay + 10*time.Millisecond) + _ = os.WriteFile(fallbackPath, []byte(`{"status":"fail","failure_reason":"transient decode retry success"}`), 0o644) + }() + + source, diagnostic, err := copyFirstValidFallbackStatus(stageStatusPath, []fallbackStatusPath{ + {path: fallbackPath, source: statusSourceWorktree}, + }) + if err != nil { + t.Fatalf("copyFirstValidFallbackStatus: %v", err) + } + if source != statusSourceWorktree { + t.Fatalf("source=%q want %q", source, statusSourceWorktree) + } + if strings.TrimSpace(diagnostic) != "" { + t.Fatalf("diagnostic=%q want empty", diagnostic) + } + + b, err := os.ReadFile(stageStatusPath) + if err != nil { + t.Fatalf("read copied stage status: %v", err) + } + out, err := runtime.DecodeOutcomeJSON(b) + if err != nil { + t.Fatalf("decode copied stage status: %v", err) + } + if out.Status != runtime.StatusFail { + t.Fatalf("status=%q want %q", out.Status, runtime.StatusFail) + } + if out.FailureReason != "transient decode retry success" { + t.Fatalf("failure_reason=%q want %q", out.FailureReason, "transient decode retry success") + } +} + +func TestCopyFirstValidFallbackStatus_TypeMismatchIsInvalidPayload(t *testing.T) { + tmp := t.TempDir() + stageStatusPath := filepath.Join(tmp, "logs", "a", "status.json") + fallbackPath := filepath.Join(tmp, "status.json") + + // Deterministic schema/type violation: status must be a string. + if err := os.WriteFile(fallbackPath, []byte(`{"status":123}`), 0o644); err != nil { + t.Fatalf("write invalid payload fallback: %v", err) + } + + source, diagnostic, err := copyFirstValidFallbackStatus(stageStatusPath, []fallbackStatusPath{ + {path: fallbackPath, source: statusSourceWorktree}, + }) + if err != nil { + t.Fatalf("copyFirstValidFallbackStatus: %v", err) + } + if source != statusSourceNone { + t.Fatalf("source=%q want empty", source) + } + if !strings.Contains(diagnostic, "invalid status payload") { + t.Fatalf("diagnostic=%q want mention of invalid status payload", diagnostic) + } + if strings.Contains(diagnostic, "corrupt status artifact") { + t.Fatalf("diagnostic=%q should not classify type mismatch as corrupt artifact", diagnostic) + } +} + +func TestShouldRetryFallbackRead_DoesNotRetryMissing(t *testing.T) { + if shouldRetryFallbackRead(fallbackFailureModeMissing, os.ErrNotExist) { + t.Fatalf("missing fallback paths should not be retried") + } +} + +func TestShouldRetryFallbackRead_RetriesUnexpectedEOF(t *testing.T) { + if !shouldRetryFallbackRead(fallbackFailureModeUnreadable, io.ErrUnexpectedEOF) { + t.Fatalf("unexpected EOF should remain retryable for unreadable fallback artifacts") + } +} + +func TestShouldRetryFallbackRead_DoesNotRetryRegularUnreadable(t *testing.T) { + if shouldRetryFallbackRead(fallbackFailureModeUnreadable, errors.New("permission denied")) { + t.Fatalf("regular unreadable fallback artifacts should not be retried") + } +} diff --git a/internal/attractor/engine/status_json_test.go b/internal/attractor/engine/status_json_test.go index df90a1f0..6bdc5579 100644 --- a/internal/attractor/engine/status_json_test.go +++ b/internal/attractor/engine/status_json_test.go @@ -74,8 +74,21 @@ func TestCodergenStatusIngestion_FallbackOnlyWhenCanonicalMissing(t *testing.T) } func TestCodergenStatusIngestion_InvalidFallbackIsRejected(t *testing.T) { - _, source := runStatusIngestionFixture(t, false, false, true) + out, source := runStatusIngestionFixture(t, false, false, true) if source != "" { t.Fatalf("source=%q want empty", source) } + if out.Status != runtime.StatusFail { + t.Fatalf("status=%q want %q", out.Status, runtime.StatusFail) + } +} + +func TestCodergenStatusIngestion_MissingFallbackIsDiagnosed(t *testing.T) { + out, source := runStatusIngestionFixture(t, false, false, false) + if source != "" { + t.Fatalf("source=%q want empty", source) + } + if out.Status != runtime.StatusFail { + t.Fatalf("status=%q want %q", out.Status, runtime.StatusFail) + } } diff --git a/internal/attractor/runtime/status.go b/internal/attractor/runtime/status.go index a33f1f92..ef479bce 100644 --- a/internal/attractor/runtime/status.go +++ b/internal/attractor/runtime/status.go @@ -9,17 +9,20 @@ import ( type StageStatus string const ( - StatusSuccess StageStatus = "success" - StatusPartialSuccess StageStatus = "partial_success" - StatusRetry StageStatus = "retry" - StatusFail StageStatus = "fail" - StatusSkipped StageStatus = "skipped" + StatusSuccess StageStatus = "success" + StatusDegradedSuccess StageStatus = "degraded_success" + StatusPartialSuccess StageStatus = "partial_success" + StatusRetry StageStatus = "retry" + StatusFail StageStatus = "fail" + StatusSkipped StageStatus = "skipped" ) func ParseStageStatus(s string) (StageStatus, error) { switch strings.ToLower(strings.TrimSpace(s)) { case "success", "ok": return StatusSuccess, nil + case "degraded_success", "degradedsuccess", "degraded-success": + return StatusDegradedSuccess, nil case "partial_success", "partialsuccess", "partial-success": return StatusPartialSuccess, nil case "retry": @@ -49,20 +52,36 @@ func (s StageStatus) Valid() bool { // (success, partial_success, retry, fail, skipped) rather than a custom routing value. func (s StageStatus) IsCanonical() bool { switch s { - case StatusSuccess, StatusPartialSuccess, StatusRetry, StatusFail, StatusSkipped: + case StatusSuccess, StatusDegradedSuccess, StatusPartialSuccess, StatusRetry, StatusFail, StatusSkipped: return true default: return false } } +// VerificationResult captures the outcome of verification commands run during a stage. +type VerificationResult struct { + Status string `json:"status"` // "passed", "failed", or "blocked" + BlockedReason string `json:"blocked_reason,omitempty"` // why verification could not run + Commands []VerificationEntry `json:"commands,omitempty"` // individual command results +} + +// VerificationEntry records the result of a single verification command. +type VerificationEntry struct { + Command string `json:"command"` + ExitCode int `json:"exit_code"` + Blocked bool `json:"blocked,omitempty"` + Reason string `json:"reason,omitempty"` +} + type Outcome struct { - Status StageStatus `json:"status"` - PreferredLabel string `json:"preferred_label,omitempty"` - SuggestedNextIDs []string `json:"suggested_next_ids,omitempty"` - ContextUpdates map[string]any `json:"context_updates,omitempty"` - Notes string `json:"notes,omitempty"` - FailureReason string `json:"failure_reason,omitempty"` + Status StageStatus `json:"status"` + PreferredLabel string `json:"preferred_label,omitempty"` + SuggestedNextIDs []string `json:"suggested_next_ids,omitempty"` + ContextUpdates map[string]any `json:"context_updates,omitempty"` + Notes string `json:"notes,omitempty"` + FailureReason string `json:"failure_reason,omitempty"` + Verification *VerificationResult `json:"verification,omitempty"` // Details is optional structured information for failures (or for debugging). // The engine does not use it for routing, but it must be preserved when present. Details any `json:"details,omitempty"` diff --git a/internal/attractor/runtime/status_test.go b/internal/attractor/runtime/status_test.go index b62e4d1c..7c6e7e30 100644 --- a/internal/attractor/runtime/status_test.go +++ b/internal/attractor/runtime/status_test.go @@ -11,12 +11,15 @@ func TestParseStageStatus_CanonicalAndLegacy(t *testing.T) { want StageStatus }{ {"success", StatusSuccess}, + {"degraded_success", StatusDegradedSuccess}, {"partial_success", StatusPartialSuccess}, {"retry", StatusRetry}, {"fail", StatusFail}, {"skipped", StatusSkipped}, // Compatibility aliases. {"ok", StatusSuccess}, + {"degraded-success", StatusDegradedSuccess}, + {"degradedsuccess", StatusDegradedSuccess}, {"error", StatusFail}, {"SUCCESS", StatusSuccess}, {"FAIL", StatusFail}, @@ -68,6 +71,9 @@ func TestStageStatus_IsCanonical(t *testing.T) { if !StatusSuccess.IsCanonical() { t.Fatalf("StatusSuccess should be canonical") } + if !StatusDegradedSuccess.IsCanonical() { + t.Fatalf("StatusDegradedSuccess should be canonical") + } if !StatusFail.IsCanonical() { t.Fatalf("StatusFail should be canonical") } @@ -164,6 +170,72 @@ func TestDecodeOutcomeJSON_LegacyFailDetails_PopulatesFailureReason(t *testing.T } } +func TestDecodeOutcomeJSON_DegradedSuccessWithVerification(t *testing.T) { + input := `{ + "status": "degraded_success", + "notes": "implementation complete but tsc blocked by DNS failure", + "verification": { + "status": "blocked", + "blocked_reason": "npm registry DNS failure", + "commands": [ + {"command": "npx tsc", "exit_code": 1, "blocked": true, "reason": "EAI_AGAIN"} + ] + } + }` + o, err := DecodeOutcomeJSON([]byte(input)) + if err != nil { + t.Fatalf("DecodeOutcomeJSON: %v", err) + } + if o.Status != StatusDegradedSuccess { + t.Fatalf("status: got %q want %q", o.Status, StatusDegradedSuccess) + } + if o.Verification == nil { + t.Fatal("expected non-nil verification") + } + if o.Verification.Status != "blocked" { + t.Fatalf("verification status: got %q want %q", o.Verification.Status, "blocked") + } + if o.Verification.BlockedReason != "npm registry DNS failure" { + t.Fatalf("verification blocked_reason: got %q", o.Verification.BlockedReason) + } + if len(o.Verification.Commands) != 1 { + t.Fatalf("expected 1 verification command, got %d", len(o.Verification.Commands)) + } + cmd := o.Verification.Commands[0] + if cmd.Command != "npx tsc" || cmd.ExitCode != 1 || !cmd.Blocked { + t.Fatalf("unexpected verification command: %+v", cmd) + } +} + +func TestDecodeOutcomeJSON_SuccessWithPassedVerification(t *testing.T) { + input := `{ + "status": "success", + "verification": { + "status": "passed", + "commands": [ + {"command": "go test ./...", "exit_code": 0}, + {"command": "go vet ./...", "exit_code": 0} + ] + } + }` + o, err := DecodeOutcomeJSON([]byte(input)) + if err != nil { + t.Fatalf("DecodeOutcomeJSON: %v", err) + } + if o.Status != StatusSuccess { + t.Fatalf("status: got %q want %q", o.Status, StatusSuccess) + } + if o.Verification == nil { + t.Fatal("expected non-nil verification") + } + if o.Verification.Status != "passed" { + t.Fatalf("verification status: got %q want %q", o.Verification.Status, "passed") + } + if len(o.Verification.Commands) != 2 { + t.Fatalf("expected 2 verification commands, got %d", len(o.Verification.Commands)) + } +} + func TestDecodeOutcomeJSON_LegacyRetryDetails_PopulatesFailureReason(t *testing.T) { o, err := DecodeOutcomeJSON([]byte(`{"outcome":"retry","details":"transient timeout"}`)) if err != nil {