From 255e0bab61499762a7dbbfbbf039ca5a114475de Mon Sep 17 00:00:00 2001 From: Ryder Freeman Date: Tue, 31 Mar 2026 06:59:27 -0700 Subject: [PATCH 1/4] feat(daemon): make daemon mode default for csa run, add session kill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three behavioral changes: 1. `csa run` now launches in daemon mode by default (returns SID immediately). Use `--no-daemon` to opt back into blocking mode. The old `--daemon` flag is kept as a hidden no-op for compatibility. 2. `csa session wait` timeout is hardcoded to 250s. The `--timeout` parameter has been removed — callers no longer choose wait duration. 3. New `csa session kill` subcommand sends SIGTERM to the daemon process group, with 5s grace period before SIGKILL escalation. All 10 pattern families updated: removed `--daemon` flags and `session wait --timeout` parameters to match the new defaults. Co-Authored-By: Claude Opus 4.6 (1M context) --- Cargo.lock | 32 ++++---- Cargo.toml | 2 +- crates/cli-sub-agent/src/cli.rs | 9 ++- crates/cli-sub-agent/src/cli_session.rs | 16 +++- crates/cli-sub-agent/src/main.rs | 7 +- crates/cli-sub-agent/src/run_cmd_daemon.rs | 9 ++- crates/cli-sub-agent/src/session_cmds.rs | 6 +- .../cli-sub-agent/src/session_cmds_daemon.rs | 80 +++++++++++++++++-- crates/cli-sub-agent/src/session_dispatch.rs | 11 ++- patterns/ai-reviewed-commit/PATTERN.md | 4 +- patterns/csa-review/PATTERN.md | 12 +-- .../csa-review/skills/csa-review/SKILL.md | 4 +- patterns/csa-review/workflow.toml | 12 +-- patterns/debate/PATTERN.md | 20 ++--- patterns/debate/skills/debate/SKILL.md | 16 ++-- patterns/debate/workflow.toml | 16 ++-- patterns/pr-bot/PATTERN.md | 16 ++-- patterns/pr-bot/workflow.toml | 16 ++-- patterns/sa/PATTERN.md | 12 +-- patterns/sa/skills/sa/SKILL.md | 12 +-- patterns/sa/workflow.toml | 12 +-- patterns/security-audit/PATTERN.md | 4 +- 22 files changed, 205 insertions(+), 123 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8a6c35ea..4e4d2bf3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -515,7 +515,7 @@ checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "cli-sub-agent" -version = "0.1.197" +version = "0.1.198" dependencies = [ "anyhow", "chrono", @@ -703,7 +703,7 @@ dependencies = [ [[package]] name = "csa-acp" -version = "0.1.197" +version = "0.1.198" dependencies = [ "agent-client-protocol", "anyhow", @@ -723,7 +723,7 @@ dependencies = [ [[package]] name = "csa-config" -version = "0.1.197" +version = "0.1.198" dependencies = [ "anyhow", "chrono", @@ -739,7 +739,7 @@ dependencies = [ [[package]] name = "csa-core" -version = "0.1.197" +version = "0.1.198" dependencies = [ "agent-teams", "chrono", @@ -754,7 +754,7 @@ dependencies = [ [[package]] name = "csa-eval" -version = "0.1.197" +version = "0.1.198" dependencies = [ "anyhow", "chrono", @@ -768,7 +768,7 @@ dependencies = [ [[package]] name = "csa-executor" -version = "0.1.197" +version = "0.1.198" dependencies = [ "agent-teams", "anyhow", @@ -794,7 +794,7 @@ dependencies = [ [[package]] name = "csa-hooks" -version = "0.1.197" +version = "0.1.198" dependencies = [ "anyhow", "chrono", @@ -811,7 +811,7 @@ dependencies = [ [[package]] name = "csa-lock" -version = "0.1.197" +version = "0.1.198" dependencies = [ "anyhow", "chrono", @@ -823,7 +823,7 @@ dependencies = [ [[package]] name = "csa-mcp-hub" -version = "0.1.197" +version = "0.1.198" dependencies = [ "anyhow", "axum", @@ -845,7 +845,7 @@ dependencies = [ [[package]] name = "csa-memory" -version = "0.1.197" +version = "0.1.198" dependencies = [ "anyhow", "async-trait", @@ -863,7 +863,7 @@ dependencies = [ [[package]] name = "csa-process" -version = "0.1.197" +version = "0.1.198" dependencies = [ "anyhow", "chrono", @@ -881,7 +881,7 @@ dependencies = [ [[package]] name = "csa-resource" -version = "0.1.197" +version = "0.1.198" dependencies = [ "anyhow", "csa-core", @@ -897,7 +897,7 @@ dependencies = [ [[package]] name = "csa-scheduler" -version = "0.1.197" +version = "0.1.198" dependencies = [ "anyhow", "chrono", @@ -915,7 +915,7 @@ dependencies = [ [[package]] name = "csa-session" -version = "0.1.197" +version = "0.1.198" dependencies = [ "anyhow", "chrono", @@ -936,7 +936,7 @@ dependencies = [ [[package]] name = "csa-todo" -version = "0.1.197" +version = "0.1.198" dependencies = [ "anyhow", "chrono", @@ -4325,7 +4325,7 @@ dependencies = [ [[package]] name = "weave" -version = "0.1.197" +version = "0.1.198" dependencies = [ "anyhow", "clap", diff --git a/Cargo.toml b/Cargo.toml index 5301ef98..927c402e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["crates/*"] resolver = "2" [workspace.package] -version = "0.1.197" +version = "0.1.198" edition = "2024" rust-version = "1.88" license = "Apache-2.0" diff --git a/crates/cli-sub-agent/src/cli.rs b/crates/cli-sub-agent/src/cli.rs index ad728996..4aa75f92 100644 --- a/crates/cli-sub-agent/src/cli.rs +++ b/crates/cli-sub-agent/src/cli.rs @@ -228,11 +228,14 @@ pub enum Commands { #[arg(long, value_delimiter = ',', value_name = "PATH")] extra_writable: Vec, - /// Detach as a background daemon; prints session ID and exits immediately. - /// Use `csa session wait/attach/result` to interact with the daemon. - #[arg(long)] + /// [DEPRECATED] Daemon mode is now the default. This flag is a no-op. + #[arg(long, hide = true)] daemon: bool, + /// Run in foreground blocking mode instead of the default daemon mode. + #[arg(long)] + no_daemon: bool, + /// Internal flag: this process IS the daemon child. Skip re-spawning. #[arg(long, hide = true)] daemon_child: bool, diff --git a/crates/cli-sub-agent/src/cli_session.rs b/crates/cli-sub-agent/src/cli_session.rs index 60ee0b92..c366f886 100644 --- a/crates/cli-sub-agent/src/cli_session.rs +++ b/crates/cli-sub-agent/src/cli_session.rs @@ -191,15 +191,23 @@ pub enum SessionCommands { cd: Option, }, - /// Wait for a daemon session to complete (poll until result.toml exists) + /// Wait for a daemon session to complete (poll until result.toml exists). + /// Hardcoded timeout: 250 seconds. Wait { /// Session ID to wait for #[arg(long)] session: String, - /// Timeout in seconds (0 = wait forever) - #[arg(long, default_value = "0")] - timeout: u64, + /// Working directory + #[arg(long)] + cd: Option, + }, + + /// Kill a running daemon session (SIGTERM, then SIGKILL after grace period) + Kill { + /// Session ID to kill + #[arg(long)] + session: String, /// Working directory #[arg(long)] diff --git a/crates/cli-sub-agent/src/main.rs b/crates/cli-sub-agent/src/main.rs index a8ee4bc2..8662af18 100644 --- a/crates/cli-sub-agent/src/main.rs +++ b/crates/cli-sub-agent/src/main.rs @@ -407,12 +407,13 @@ async fn run() -> Result<()> { force_ignore_tier_setting, no_fs_sandbox, extra_writable, - daemon, + daemon: _daemon, + no_daemon, daemon_child, session_id, } => { - // Daemon spawn: when --daemon is set and not already the child, fork and exit. - if daemon && !daemon_child { + // Daemon spawn: daemon mode is the default; --no-daemon opts out. + if !no_daemon && !daemon_child { if let Some(ref _id) = session_id { anyhow::bail!("--session-id is an internal flag and must not be used directly"); } diff --git a/crates/cli-sub-agent/src/run_cmd_daemon.rs b/crates/cli-sub-agent/src/run_cmd_daemon.rs index 282af82b..a32591da 100644 --- a/crates/cli-sub-agent/src/run_cmd_daemon.rs +++ b/crates/cli-sub-agent/src/run_cmd_daemon.rs @@ -1,4 +1,4 @@ -//! Daemon spawn logic for `csa run --daemon`. +//! Daemon spawn logic for `csa run` (daemon mode is the default). //! //! Extracted from main.rs to keep the dispatch function under the //! monolith file limit. @@ -26,7 +26,12 @@ pub(crate) fn spawn_and_exit(cd: Option<&str>) -> Result<()> { // that may appear before the subcommand (e.g. `csa --format json run ...`). let all_args: Vec = std::env::args().collect(); let run_pos = all_args.iter().position(|a| a == "run").unwrap_or(1); - let forwarded_args: Vec = all_args.iter().skip(run_pos + 1).cloned().collect(); + let forwarded_args: Vec = all_args + .iter() + .skip(run_pos + 1) + .filter(|a| *a != "--daemon") // daemon is now default; strip no-op flag + .cloned() + .collect(); let csa_binary = std::env::current_exe().unwrap_or_else(|_| std::path::PathBuf::from("csa")); diff --git a/crates/cli-sub-agent/src/session_cmds.rs b/crates/cli-sub-agent/src/session_cmds.rs index 611fb0f8..503103ee 100644 --- a/crates/cli-sub-agent/src/session_cmds.rs +++ b/crates/cli-sub-agent/src/session_cmds.rs @@ -759,8 +759,10 @@ pub(crate) fn handle_session_checkpoints(cd: Option) -> Result<()> { Ok(()) } -// Daemon-specific commands (wait, attach) are in session_cmds_daemon.rs. -pub(crate) use crate::session_cmds_daemon::{handle_session_attach, handle_session_wait}; +// Daemon-specific commands (wait, attach, kill) are in session_cmds_daemon.rs. +pub(crate) use crate::session_cmds_daemon::{ + handle_session_attach, handle_session_kill, handle_session_wait, +}; #[cfg(test)] #[path = "session_cmds_tests.rs"] diff --git a/crates/cli-sub-agent/src/session_cmds_daemon.rs b/crates/cli-sub-agent/src/session_cmds_daemon.rs index 408b875b..814d1cb7 100644 --- a/crates/cli-sub-agent/src/session_cmds_daemon.rs +++ b/crates/cli-sub-agent/src/session_cmds_daemon.rs @@ -1,4 +1,4 @@ -//! Daemon-specific session commands: wait and attach. +//! Daemon-specific session commands: wait, attach, and kill. //! //! Extracted from session_cmds.rs to stay under the monolith file limit. @@ -37,11 +37,10 @@ fn read_daemon_pid(session_dir: &std::path::Path) -> Option { /// /// Exits 0 when result.toml appears (streams stdout.log), exits 124 on timeout, /// exits 1 if the daemon process died without producing a result. -pub(crate) fn handle_session_wait( - session: String, - timeout_secs: u64, - cd: Option, -) -> Result { +/// Hardcoded wait timeout in seconds. +const WAIT_TIMEOUT_SECS: u64 = 250; + +pub(crate) fn handle_session_wait(session: String, cd: Option) -> Result { let project_root = crate::pipeline::determine_project_root(cd.as_deref())?; let resolved = resolve_session_prefix_with_fallback(&project_root, &session)?; let session_dir = get_session_dir(&project_root, &resolved.session_id)?; @@ -73,10 +72,10 @@ pub(crate) fn handle_session_wait( return Ok(1); } - if timeout_secs > 0 && start.elapsed().as_secs() >= timeout_secs { + if start.elapsed().as_secs() >= WAIT_TIMEOUT_SECS { eprintln!( "Timeout: session {} did not complete within {}s", - resolved.session_id, timeout_secs + resolved.session_id, WAIT_TIMEOUT_SECS ); return Ok(124); } @@ -193,3 +192,68 @@ pub(crate) fn handle_session_attach( } } } + +/// Kill a running daemon session by sending SIGTERM to the process group, +/// then SIGKILL after a 5-second grace period if still alive. +pub(crate) fn handle_session_kill(session: String, cd: Option) -> Result<()> { + let project_root = crate::pipeline::determine_project_root(cd.as_deref())?; + let resolved = resolve_session_prefix_with_fallback(&project_root, &session)?; + let session_dir = get_session_dir(&project_root, &resolved.session_id)?; + + let pid = read_daemon_pid(&session_dir).ok_or_else(|| { + anyhow::anyhow!( + "No daemon PID found for session {} — may not be a daemon session", + resolved.session_id, + ) + })?; + + if !is_pid_alive(pid) { + eprintln!( + "Session {} (PID {}) is already dead", + resolved.session_id, pid, + ); + return Ok(()); + } + + // Send SIGTERM to the process group (negative PID). + eprintln!( + "Sending SIGTERM to session {} (PID {})...", + resolved.session_id, pid, + ); + // SAFETY: kill(-pid, SIGTERM) sends to the entire process group. + unsafe { + libc::kill(-(pid as i32), libc::SIGTERM); + } + + // Grace period: wait up to 5 seconds for clean shutdown. + for _ in 0..50 { + if !is_pid_alive(pid) { + eprintln!("Session {} terminated", resolved.session_id); + return Ok(()); + } + std::thread::sleep(std::time::Duration::from_millis(100)); + } + + // Force kill. + eprintln!( + "Session {} still alive after 5s, sending SIGKILL...", + resolved.session_id, + ); + // SAFETY: kill(-pid, SIGKILL) force-kills the entire process group. + unsafe { + libc::kill(-(pid as i32), libc::SIGKILL); + } + + // Wait for reaping. + std::thread::sleep(std::time::Duration::from_millis(500)); + if is_pid_alive(pid) { + anyhow::bail!( + "Failed to kill session {} (PID {})", + resolved.session_id, + pid, + ); + } + + eprintln!("Session {} killed", resolved.session_id); + Ok(()) +} diff --git a/crates/cli-sub-agent/src/session_dispatch.rs b/crates/cli-sub-agent/src/session_dispatch.rs index 8afb3a8d..8cc173ae 100644 --- a/crates/cli-sub-agent/src/session_dispatch.rs +++ b/crates/cli-sub-agent/src/session_dispatch.rs @@ -89,16 +89,15 @@ pub(crate) fn dispatch(cmd: SessionCommands, output_format: OutputFormat) -> Res } => { session_cmds::handle_session_tool_output(session, index, list, cd)?; } - SessionCommands::Wait { - session, - timeout, - cd, - } => { - let exit_code = session_cmds::handle_session_wait(session, timeout, cd)?; + SessionCommands::Wait { session, cd } => { + let exit_code = session_cmds::handle_session_wait(session, cd)?; let _ = std::io::stdout().flush(); let _ = std::io::stderr().flush(); std::process::exit(exit_code); } + SessionCommands::Kill { session, cd } => { + session_cmds::handle_session_kill(session, cd)?; + } SessionCommands::Attach { session, stderr, diff --git a/patterns/ai-reviewed-commit/PATTERN.md b/patterns/ai-reviewed-commit/PATTERN.md index a1b4b298..17dd6e58 100644 --- a/patterns/ai-reviewed-commit/PATTERN.md +++ b/patterns/ai-reviewed-commit/PATTERN.md @@ -107,8 +107,8 @@ Tier: tier-1-quick Delegate commit message generation to cheaper tool. ```bash -SID=$(csa run --daemon "Run 'git diff --staged' and generate a Conventional Commits message") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run "Run 'git diff --staged' and generate a Conventional Commits message") +csa session wait --session "$SID" ``` ## Step 10: Commit diff --git a/patterns/csa-review/PATTERN.md b/patterns/csa-review/PATTERN.md index e5616c73..1bbd98dc 100644 --- a/patterns/csa-review/PATTERN.md +++ b/patterns/csa-review/PATTERN.md @@ -93,8 +93,8 @@ implementation context. Note: `csa review` does not yet support `--fork-from` directly. Use `csa run --fork-from` with a review prompt instead: ```bash -SID=$(csa run --daemon --fork-from "Review the uncommitted changes: $(git diff)") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --fork-from "Review the uncommitted changes: $(git diff)") +csa session wait --session "$SID" ``` **Benefits**: The reviewer inherits the implementer's context (files read, @@ -110,8 +110,8 @@ are immediately visible. Also saves tokens by avoiding redundant file reads. When no implementation session is available, use standard review: ```bash -SID=$(csa run --daemon --force-ignore-tier-setting --tool ${REVIEW_TOOL} --description "code-review: ${SCOPE}" "${REVIEW_PROMPT}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --force-ignore-tier-setting --tool ${REVIEW_TOOL} --description "code-review: ${SCOPE}" "${REVIEW_PROMPT}") +csa session wait --session "$SID" ``` ## Step 6: Present Results @@ -131,8 +131,8 @@ Generate fix-summary.md and post-fix-review-findings.json. Mark remaining P0/P1 as incomplete. ```bash -SID=$(csa run --daemon --force-ignore-tier-setting --tool ${REVIEW_TOOL} --session ${SESSION_ID} "${FIX_PROMPT}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --force-ignore-tier-setting --tool ${REVIEW_TOOL} --session ${SESSION_ID} "${FIX_PROMPT}") +csa session wait --session "$SID" ``` ## ENDIF diff --git a/patterns/csa-review/skills/csa-review/SKILL.md b/patterns/csa-review/skills/csa-review/SKILL.md index 9a3aca00..896af83c 100644 --- a/patterns/csa-review/skills/csa-review/SKILL.md +++ b/patterns/csa-review/skills/csa-review/SKILL.md @@ -104,10 +104,10 @@ The review prompt instructs the agent to: read project context (CLAUDE.md + AGEN ### Step 3: Execute Review via CSA ```bash -SID=$(csa run --daemon --sa-mode true --force-ignore-tier-setting --tool {review_tool} \ +SID=$(csa run --sa-mode true --force-ignore-tier-setting --tool {review_tool} \ --description "code-review: {scope}" \ "{REVIEW_PROMPT}") -csa session wait --session "$SID" --timeout 1800 +csa session wait --session "$SID" ``` Key behaviors: diff --git a/patterns/csa-review/workflow.toml b/patterns/csa-review/workflow.toml index a250c26f..9446b74f 100644 --- a/patterns/csa-review/workflow.toml +++ b/patterns/csa-review/workflow.toml @@ -110,8 +110,8 @@ implementation context. Note: `csa review` does not yet support `--fork-from` directly. Use `csa run --fork-from` with a review prompt instead: ```bash -SID=$(csa run --daemon --fork-from "Review the uncommitted changes: $(git diff)") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --fork-from "Review the uncommitted changes: $(git diff)") +csa session wait --session "$SID" ``` **Benefits**: The reviewer inherits the implementer's context (files read, @@ -127,8 +127,8 @@ are immediately visible. Also saves tokens by avoiding redundant file reads. When no implementation session is available, use standard review: ```bash -SID=$(csa run --daemon --force-ignore-tier-setting --tool ${REVIEW_TOOL} --description "code-review: ${SCOPE}" "${REVIEW_PROMPT}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --force-ignore-tier-setting --tool ${REVIEW_TOOL} --description "code-review: ${SCOPE}" "${REVIEW_PROMPT}") +csa session wait --session "$SID" ```""" tier = "tier-3-complex" on_fail = "abort" @@ -151,8 +151,8 @@ Generate fix-summary.md and post-fix-review-findings.json. Mark remaining P0/P1 as incomplete. ```bash -SID=$(csa run --daemon --force-ignore-tier-setting --tool ${REVIEW_TOOL} --session ${SESSION_ID} "${FIX_PROMPT}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --force-ignore-tier-setting --tool ${REVIEW_TOOL} --session ${SESSION_ID} "${FIX_PROMPT}") +csa session wait --session "$SID" ```""" tier = "tier-3-complex" on_fail = "abort" diff --git a/patterns/debate/PATTERN.md b/patterns/debate/PATTERN.md index 1f60de1f..15944c1e 100644 --- a/patterns/debate/PATTERN.md +++ b/patterns/debate/PATTERN.md @@ -54,8 +54,8 @@ the research context into the debate prompt: ```bash # Gather context via forked session, then feed into debate -SID=$(csa run --daemon --fork-from "Summarize findings for debate context") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --fork-from "Summarize findings for debate context") +csa session wait --session "$SID" csa debate "question (with research context above)" ``` @@ -78,8 +78,8 @@ Proposer presents concrete, actionable strategy with: 4. Anticipated Weaknesses (honest limitations) ```bash -SID=$(csa run --daemon --model-spec "${PROPOSER_MODEL}" --ephemeral "${PROPOSAL_PROMPT}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --model-spec "${PROPOSER_MODEL}" --ephemeral "${PROPOSAL_PROMPT}") +csa session wait --session "$SID" ``` ## Step 6: Round 1 — Critique @@ -94,8 +94,8 @@ Critic rigorously evaluates the proposal: 4. Strongest Counter-Arguments ```bash -SID=$(csa run --daemon --model-spec "${CRITIC_MODEL}" --ephemeral "${CRITIQUE_PROMPT}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --model-spec "${CRITIC_MODEL}" --ephemeral "${CRITIQUE_PROMPT}") +csa session wait --session "$SID" ``` ## Step 7: Round 1 — Response @@ -109,8 +109,8 @@ Proposer responds to each criticism: 3. Present revised strategy ```bash -SID=$(csa run --daemon --model-spec "${PROPOSER_MODEL}" --ephemeral "${RESPONSE_PROMPT}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --model-spec "${PROPOSER_MODEL}" --ephemeral "${RESPONSE_PROMPT}") +csa session wait --session "$SID" ``` ## Step 8: Convergence Evaluation @@ -130,8 +130,8 @@ Restart debate loop with higher tier models. Max 2 escalations. ```bash -SID=$(csa run --daemon --model-spec "${HIGHER_TIER_MODEL}" --ephemeral "${ESCALATION_PROMPT}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --model-spec "${HIGHER_TIER_MODEL}" --ephemeral "${ESCALATION_PROMPT}") +csa session wait --session "$SID" ``` ## ENDIF diff --git a/patterns/debate/skills/debate/SKILL.md b/patterns/debate/skills/debate/SKILL.md index d31544ba..516b100f 100644 --- a/patterns/debate/skills/debate/SKILL.md +++ b/patterns/debate/skills/debate/SKILL.md @@ -186,7 +186,7 @@ Within the selected tier, models alternate via round-robin: **Round N (Proposal)**: ```bash -SID=$(csa run --daemon --model-spec "{models[proposer_index]}" --ephemeral \ +SID=$(csa run --model-spec "{models[proposer_index]}" --ephemeral \ "Question: {question} You are the PROPOSER in an adversarial debate. {context_from_previous_rounds} @@ -196,12 +196,12 @@ Provide a concrete, actionable strategy. Structure your response as: 2. Key Arguments (numbered, with evidence/reasoning) 3. Implementation Steps (concrete actions) 4. Anticipated Weaknesses (acknowledge limitations honestly)") -csa session wait --session "$SID" --timeout 1800 +csa session wait --session "$SID" ``` **Round N (Critique)**: ```bash -SID=$(csa run --daemon --model-spec "{models[critic_index]}" --ephemeral \ +SID=$(csa run --model-spec "{models[critic_index]}" --ephemeral \ "Question: {question} You are the CRITIC in an adversarial debate. @@ -216,12 +216,12 @@ Rigorously critique this proposal: 4. Strongest Counter-Arguments (the best case AGAINST this proposal) Be intellectually honest: acknowledge strengths before attacking weaknesses.") -csa session wait --session "$SID" --timeout 1800 +csa session wait --session "$SID" ``` **Round N (Response)**: ```bash -SID=$(csa run --daemon --model-spec "{models[responder_index]}" --ephemeral \ +SID=$(csa run --model-spec "{models[responder_index]}" --ephemeral \ "Question: {question} You are the PROPOSER responding to criticism. @@ -238,7 +238,7 @@ Respond to each criticism: 3. Present your REVISED STRATEGY incorporating lessons learned If the critique fundamentally undermines your approach, propose a new strategy.") -csa session wait --session "$SID" --timeout 1800 +csa session wait --session "$SID" ``` ### Step 4: Convergence Evaluation @@ -266,7 +266,7 @@ When escalation is triggered: ```bash # Example: escalating from tier-1-quick to tier-2-standard -SID=$(csa run --daemon --model-spec "{higher_tier_models[0]}" --ephemeral \ +SID=$(csa run --model-spec "{higher_tier_models[0]}" --ephemeral \ "Question: {question} PREVIOUS DEBATE SUMMARY (lower-tier models could not resolve): @@ -276,7 +276,7 @@ You have been escalated to provide deeper analysis. Build on the previous debate 1. Identify what the previous debaters missed 2. Propose a superior strategy with stronger reasoning 3. Address all unresolved criticisms") -csa session wait --session "$SID" --timeout 1800 +csa session wait --session "$SID" ``` ### Step 6: Final Synthesis diff --git a/patterns/debate/workflow.toml b/patterns/debate/workflow.toml index 9c611bbc..dc060a10 100644 --- a/patterns/debate/workflow.toml +++ b/patterns/debate/workflow.toml @@ -86,8 +86,8 @@ Proposer presents concrete, actionable strategy with: 4. Anticipated Weaknesses (honest limitations) ```bash -SID=$(csa run --daemon --model-spec "${PROPOSER_MODEL}" --ephemeral "${PROPOSAL_PROMPT}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --model-spec "${PROPOSER_MODEL}" --ephemeral "${PROPOSAL_PROMPT}") +csa session wait --session "$SID" ```""" tier = "${CURRENT_TIER}" on_fail = "abort" @@ -104,8 +104,8 @@ Critic rigorously evaluates the proposal: 4. Strongest Counter-Arguments ```bash -SID=$(csa run --daemon --model-spec "${CRITIC_MODEL}" --ephemeral "${CRITIQUE_PROMPT}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --model-spec "${CRITIC_MODEL}" --ephemeral "${CRITIQUE_PROMPT}") +csa session wait --session "$SID" ```""" tier = "${CURRENT_TIER}" on_fail = "abort" @@ -121,8 +121,8 @@ Proposer responds to each criticism: 3. Present revised strategy ```bash -SID=$(csa run --daemon --model-spec "${PROPOSER_MODEL}" --ephemeral "${RESPONSE_PROMPT}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --model-spec "${PROPOSER_MODEL}" --ephemeral "${RESPONSE_PROMPT}") +csa session wait --session "$SID" ```""" tier = "${CURRENT_TIER}" on_fail = "abort" @@ -147,8 +147,8 @@ Restart debate loop with higher tier models. Max 2 escalations. ```bash -SID=$(csa run --daemon --model-spec "${HIGHER_TIER_MODEL}" --ephemeral "${ESCALATION_PROMPT}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --model-spec "${HIGHER_TIER_MODEL}" --ephemeral "${ESCALATION_PROMPT}") +csa session wait --session "$SID" ```""" on_fail = "abort" condition = "${NEEDS_ESCALATION}" diff --git a/patterns/pr-bot/PATTERN.md b/patterns/pr-bot/PATTERN.md index f1ff8589..6b5b36d9 100644 --- a/patterns/pr-bot/PATTERN.md +++ b/patterns/pr-bot/PATTERN.md @@ -355,7 +355,7 @@ BOT_UNAVAILABLE=true FALLBACK_REVIEW_HAS_ISSUES=false BOT_HAS_ISSUES=false set +e -WAIT_SID="$(csa run --daemon --sa-mode true --force-ignore-tier-setting --tool auto --timeout 1800 --idle-timeout 650 "Bounded wait task only. Do NOT invoke pr-bot skill or any full PR workflow. Operate on PR #${PR_NUM} in repo ${REPO}. Wait for @${CLOUD_BOT_NAME} review on HEAD ${CURRENT_SHA}. Check for a review EVENT via 'gh api repos/${REPO}/pulls/${PR_NUM}/reviews' with submitted_at after ${WAIT_BASE_TS} and user.login matching the bot. Also check issue comments for bot activity. Max wait 10 minutes (5-minute quiet wait already elapsed before this step). Do not edit code. Return exactly one marker line: BOT_REPLY=received or BOT_REPLY=timeout.")" +WAIT_SID="$(csa run --sa-mode true --force-ignore-tier-setting --tool auto --timeout 1800 --idle-timeout 650 "Bounded wait task only. Do NOT invoke pr-bot skill or any full PR workflow. Operate on PR #${PR_NUM} in repo ${REPO}. Wait for @${CLOUD_BOT_NAME} review on HEAD ${CURRENT_SHA}. Check for a review EVENT via 'gh api repos/${REPO}/pulls/${PR_NUM}/reviews' with submitted_at after ${WAIT_BASE_TS} and user.login matching the bot. Also check issue comments for bot activity. Max wait 10 minutes (5-minute quiet wait already elapsed before this step). Do not edit code. Return exactly one marker line: BOT_REPLY=received or BOT_REPLY=timeout.")" DAEMON_RC=$? set -e if [ "${DAEMON_RC}" -ne 0 ] || [ -z "${WAIT_SID}" ]; then @@ -363,7 +363,7 @@ if [ "${DAEMON_RC}" -ne 0 ] || [ -z "${WAIT_SID}" ]; then BOT_UNAVAILABLE=true else set +e - WAIT_RESULT="$(csa session wait --session "${WAIT_SID}" --timeout 1800)" + WAIT_RESULT="$(csa session wait --session "${WAIT_SID}")" WAIT_RC=$? set -e if [ "${WAIT_RC}" -ne 0 ]; then @@ -541,7 +541,7 @@ timeout branch. Steps 7-10 are structurally inside the `BOT_UNAVAILABLE=false` branch and are NOT reachable from here. Delegate this cycle to CSA as a single operation and enforce hard bounds: -- CSA daemon + session wait with `--timeout 1800` +- CSA daemon + session wait (hardcoded 250s timeout) - no `|| true` silent downgrade - success requires marker `FALLBACK_FIX=clean` - on success, orchestrator explicitly sets `FALLBACK_REVIEW_HAS_ISSUES=false` @@ -549,7 +549,7 @@ Delegate this cycle to CSA as a single operation and enforce hard bounds: ```bash set -euo pipefail set +e -FIX_SID="$(csa run --daemon --sa-mode true --force-ignore-tier-setting --tool auto --timeout 1800 --idle-timeout 1800 "Bounded fallback-fix task only. Do NOT invoke pr-bot skill or any full PR workflow. Operate on PR #${PR_NUM} in repo ${REPO}. Bot is unavailable and fallback local review found issues. Run a self-contained max-3-round fix cycle: read latest findings from csa review --range main...HEAD, apply fixes with commits, re-run review, repeat until clean. Return exactly one marker line FALLBACK_FIX=clean when clean; otherwise return FALLBACK_FIX=failed and exit non-zero.")" +FIX_SID="$(csa run --sa-mode true --force-ignore-tier-setting --tool auto --timeout 1800 --idle-timeout 1800 "Bounded fallback-fix task only. Do NOT invoke pr-bot skill or any full PR workflow. Operate on PR #${PR_NUM} in repo ${REPO}. Bot is unavailable and fallback local review found issues. Run a self-contained max-3-round fix cycle: read latest findings from csa review --range main...HEAD, apply fixes with commits, re-run review, repeat until clean. Return exactly one marker line FALLBACK_FIX=clean when clean; otherwise return FALLBACK_FIX=failed and exit non-zero.")" DAEMON_RC=$? set -e if [ "${DAEMON_RC}" -ne 0 ] || [ -z "${FIX_SID}" ]; then @@ -557,7 +557,7 @@ if [ "${DAEMON_RC}" -ne 0 ] || [ -z "${FIX_SID}" ]; then exit 1 fi set +e -FIX_RESULT="$(csa session wait --session "${FIX_SID}" --timeout 1800)" +FIX_RESULT="$(csa session wait --session "${FIX_SID}")" FIX_RC=$? set -e @@ -1030,7 +1030,7 @@ wait/fix/review loop to a single CSA-managed step. - CSA delegated step handles both paths: - Bot responds with P0/P1/P2 badges → CSA runs bounded fix/review retries (max 3 rounds), using the same 15-minute wait policy for each trigger (5-minute quiet wait + 10-minute polling). - Bot times out → CSA runs fallback `csa review --range main...HEAD` and bounded fix/review retries (max 3 rounds). -- CSA daemon + session wait with `--timeout 5400` enforces the hard timeout. +- CSA daemon + session wait (hardcoded 250s timeout) enforces the hard timeout. - delegated execution failures are hard failures (no `|| true` silent downgrade). - On delegated gate failure (timeout, non-zero, or non-PASS marker), set `REBASE_REVIEW_HAS_ISSUES=true` (and `FALLBACK_REVIEW_HAS_ISSUES=true` when appropriate), then block merge. - On success, both `REBASE_REVIEW_HAS_ISSUES` and `FALLBACK_REVIEW_HAS_ISSUES` must be false. @@ -1079,7 +1079,7 @@ if [ "${COMMIT_COUNT}" -gt 3 ]; then echo "Triggered post-rebase review via '${CLOUD_BOT_RETRIGGER_CMD}' for HEAD ${REBASE_CURRENT_SHA}." set +e - GATE_SID="$(csa run --daemon --sa-mode true --force-ignore-tier-setting --tool auto --timeout 5400 --idle-timeout 5400 "Bounded post-rebase gate task only. Do NOT invoke pr-bot skill or any full PR workflow. Operate on PR #${PR_NUM} in repo ${REPO} (branch ${WORKFLOW_BRANCH}). Complete the post-rebase review gate end-to-end. For each cloud bot trigger, wait 5 minutes quietly, then poll up to 10 minutes for a response. If response contains P0/P1/P2 findings, iteratively fix/commit/push/re-trigger and re-check with the same 15-minute wait policy (max 3 rounds). If bot times out, abort and report to user; return exactly one marker line REBASE_GATE=PASS when clean, otherwise REBASE_GATE=FAIL and exit non-zero.")" + GATE_SID="$(csa run --sa-mode true --force-ignore-tier-setting --tool auto --timeout 5400 --idle-timeout 5400 "Bounded post-rebase gate task only. Do NOT invoke pr-bot skill or any full PR workflow. Operate on PR #${PR_NUM} in repo ${REPO} (branch ${WORKFLOW_BRANCH}). Complete the post-rebase review gate end-to-end. For each cloud bot trigger, wait 5 minutes quietly, then poll up to 10 minutes for a response. If response contains P0/P1/P2 findings, iteratively fix/commit/push/re-trigger and re-check with the same 15-minute wait policy (max 3 rounds). If bot times out, abort and report to user; return exactly one marker line REBASE_GATE=PASS when clean, otherwise REBASE_GATE=FAIL and exit non-zero.")" DAEMON_RC=$? set -e if [ "${DAEMON_RC}" -ne 0 ] || [ -z "${GATE_SID}" ]; then @@ -1091,7 +1091,7 @@ if [ "${COMMIT_COUNT}" -gt 3 ]; then exit 1 fi set +e - GATE_RESULT="$(csa session wait --session "${GATE_SID}" --timeout 5400)" + GATE_RESULT="$(csa session wait --session "${GATE_SID}")" GATE_RC=$? set -e if [ "${GATE_RC}" -ne 0 ]; then diff --git a/patterns/pr-bot/workflow.toml b/patterns/pr-bot/workflow.toml index 78fe6d4c..80b75aa1 100644 --- a/patterns/pr-bot/workflow.toml +++ b/patterns/pr-bot/workflow.toml @@ -372,7 +372,7 @@ BOT_UNAVAILABLE=true FALLBACK_REVIEW_HAS_ISSUES=false BOT_HAS_ISSUES=false set +e -WAIT_SID="$(csa run --daemon --sa-mode true --force-ignore-tier-setting --tool auto --timeout 1800 --idle-timeout 650 "Bounded wait task only. Do NOT invoke pr-bot skill or any full PR workflow. Operate on PR #${PR_NUM} in repo ${REPO}. Wait for @${CLOUD_BOT_NAME} review on HEAD ${CURRENT_SHA}. Check for a review EVENT via 'gh api repos/${REPO}/pulls/${PR_NUM}/reviews' with submitted_at after ${WAIT_BASE_TS} and user.login matching the bot. Also check issue comments for bot activity. Max wait 10 minutes (5-minute quiet wait already elapsed before this step). Do not edit code. Return exactly one marker line: BOT_REPLY=received or BOT_REPLY=timeout.")" +WAIT_SID="$(csa run --sa-mode true --force-ignore-tier-setting --tool auto --timeout 1800 --idle-timeout 650 "Bounded wait task only. Do NOT invoke pr-bot skill or any full PR workflow. Operate on PR #${PR_NUM} in repo ${REPO}. Wait for @${CLOUD_BOT_NAME} review on HEAD ${CURRENT_SHA}. Check for a review EVENT via 'gh api repos/${REPO}/pulls/${PR_NUM}/reviews' with submitted_at after ${WAIT_BASE_TS} and user.login matching the bot. Also check issue comments for bot activity. Max wait 10 minutes (5-minute quiet wait already elapsed before this step). Do not edit code. Return exactly one marker line: BOT_REPLY=received or BOT_REPLY=timeout.")" DAEMON_RC=$? set -e if [ "${DAEMON_RC}" -ne 0 ] || [ -z "${WAIT_SID}" ]; then @@ -380,7 +380,7 @@ if [ "${DAEMON_RC}" -ne 0 ] || [ -z "${WAIT_SID}" ]; then BOT_UNAVAILABLE=true else set +e - WAIT_RESULT="$(csa session wait --session "${WAIT_SID}" --timeout 1800)" + WAIT_RESULT="$(csa session wait --session "${WAIT_SID}")" WAIT_RC=$? set -e if [ "${WAIT_RC}" -ne 0 ]; then @@ -560,7 +560,7 @@ DONE WHEN: ```bash set -euo pipefail set +e -FIX_SID="$(csa run --daemon --sa-mode true --force-ignore-tier-setting --tool auto --timeout 1800 --idle-timeout 1800 "Bounded fallback-fix task only. Do NOT invoke pr-bot skill or any full PR workflow. Operate on PR #${PR_NUM} in repo ${REPO}. Bot is unavailable and fallback local review found issues. Run a self-contained max-3-round fix cycle: read latest findings from csa review --range main...HEAD, apply fixes with commits, re-run review, repeat until clean. Return exactly one marker line FALLBACK_FIX=clean when clean; otherwise return FALLBACK_FIX=failed and exit non-zero.")" +FIX_SID="$(csa run --sa-mode true --force-ignore-tier-setting --tool auto --timeout 1800 --idle-timeout 1800 "Bounded fallback-fix task only. Do NOT invoke pr-bot skill or any full PR workflow. Operate on PR #${PR_NUM} in repo ${REPO}. Bot is unavailable and fallback local review found issues. Run a self-contained max-3-round fix cycle: read latest findings from csa review --range main...HEAD, apply fixes with commits, re-run review, repeat until clean. Return exactly one marker line FALLBACK_FIX=clean when clean; otherwise return FALLBACK_FIX=failed and exit non-zero.")" DAEMON_RC=$? set -e if [ "${DAEMON_RC}" -ne 0 ] || [ -z "${FIX_SID}" ]; then @@ -568,7 +568,7 @@ if [ "${DAEMON_RC}" -ne 0 ] || [ -z "${FIX_SID}" ]; then exit 1 fi set +e -FIX_RESULT="$(csa session wait --session "${FIX_SID}" --timeout 1800)" +FIX_RESULT="$(csa session wait --session "${FIX_SID}")" FIX_RC=$? set -e @@ -1042,7 +1042,7 @@ sleep 250 # --- Delegate remaining polling to CSA via daemon+wait (max 10 min) --- BOT_CLEAN=false set +e -WAIT_SID="$(csa run --daemon --sa-mode true --force-ignore-tier-setting --tool auto --timeout 1800 --idle-timeout 650 \ +WAIT_SID="$(csa run --sa-mode true --force-ignore-tier-setting --tool auto --timeout 1800 --idle-timeout 650 \ "Bounded post-fix re-review gate. Do NOT invoke pr-bot skill or any full PR workflow. \ Operate on PR #${PR_NUM} in repo ${REPO}. Wait for @${CLOUD_BOT_NAME} review on HEAD ${CURRENT_SHA}. \ Check for a review EVENT via 'gh api repos/${REPO}/pulls/${PR_NUM}/reviews' with submitted_at after \ @@ -1059,7 +1059,7 @@ if [ "${DAEMON_RC}" -ne 0 ] || [ -z "${WAIT_SID}" ]; then exit 1 fi set +e -WAIT_RESULT="$(csa session wait --session "${WAIT_SID}" --timeout 1800)" +WAIT_RESULT="$(csa session wait --session "${WAIT_SID}")" WAIT_RC=$? set -e @@ -1255,7 +1255,7 @@ if [ "${COMMIT_COUNT}" -gt 3 ]; then # 7. Delegate post-rebase wait/fix/review loop to CSA via daemon+wait. set +e - GATE_SID="$(csa run --daemon --sa-mode true --force-ignore-tier-setting --tool auto --timeout 5400 --idle-timeout 5400 "Bounded post-rebase gate task only. Do NOT invoke pr-bot skill or any full PR workflow. Operate on PR #${PR_NUM} in repo ${REPO} (branch ${WORKFLOW_BRANCH}). Complete the post-rebase review gate end-to-end. For each cloud bot trigger, wait 5 minutes quietly, then poll up to 10 minutes for a response. If response contains P0/P1/P2 findings, iteratively fix/commit/push/re-trigger and re-check with the same 15-minute wait policy (max 3 rounds). If bot times out, abort and report to user; return exactly one marker line REBASE_GATE=PASS when clean, otherwise REBASE_GATE=FAIL and exit non-zero.")" + GATE_SID="$(csa run --sa-mode true --force-ignore-tier-setting --tool auto --timeout 5400 --idle-timeout 5400 "Bounded post-rebase gate task only. Do NOT invoke pr-bot skill or any full PR workflow. Operate on PR #${PR_NUM} in repo ${REPO} (branch ${WORKFLOW_BRANCH}). Complete the post-rebase review gate end-to-end. For each cloud bot trigger, wait 5 minutes quietly, then poll up to 10 minutes for a response. If response contains P0/P1/P2 findings, iteratively fix/commit/push/re-trigger and re-check with the same 15-minute wait policy (max 3 rounds). If bot times out, abort and report to user; return exactly one marker line REBASE_GATE=PASS when clean, otherwise REBASE_GATE=FAIL and exit non-zero.")" DAEMON_RC=$? set -e if [ "${DAEMON_RC}" -ne 0 ] || [ -z "${GATE_SID}" ]; then @@ -1267,7 +1267,7 @@ if [ "${COMMIT_COUNT}" -gt 3 ]; then exit 1 fi set +e - GATE_RESULT="$(csa session wait --session "${GATE_SID}" --timeout 5400)" + GATE_RESULT="$(csa session wait --session "${GATE_SID}")" GATE_RC=$? set -e if [ "${GATE_RC}" -ne 0 ]; then diff --git a/patterns/sa/PATTERN.md b/patterns/sa/PATTERN.md index 11ac2063..cbc7f394 100644 --- a/patterns/sa/PATTERN.md +++ b/patterns/sa/PATTERN.md @@ -70,8 +70,8 @@ Layer 1 (claude-code) will: 4. Write `result.toml` to `$CSA_SESSION_DIR/result.toml` (with `todo_path = "$CSA_SESSION_DIR/artifacts/TODO.md"`) ```bash -SID=$(csa run --daemon --prompt-file "${PROMPT_FILE}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --prompt-file "${PROMPT_FILE}") +csa session wait --session "$SID" ``` ## Step 5: Parse Planning Result @@ -132,8 +132,8 @@ Fix the underlying issues to ensure codebase integrity. ```bash IMPL_FILE=$(mktemp /tmp/sa-impl-XXXXXX.txt) echo "CSA_VAR:IMPL_FILE=$IMPL_FILE" -SID=$(csa run --daemon --session "${SESSION_ID}" --prompt-file "${IMPL_FILE}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --session "${SESSION_ID}" --prompt-file "${IMPL_FILE}") +csa session wait --session "$SID" ``` ## ELSE @@ -149,8 +149,8 @@ Resume Layer 1 with user's revision feedback. ```bash RESUME_FILE=$(mktemp /tmp/sa-resume-XXXXXX.txt) echo "CSA_VAR:RESUME_FILE=$RESUME_FILE" -SID=$(csa run --daemon --session "${SESSION_ID}" --prompt-file "${RESUME_FILE}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --session "${SESSION_ID}" --prompt-file "${RESUME_FILE}") +csa session wait --session "$SID" ``` ## ELSE diff --git a/patterns/sa/skills/sa/SKILL.md b/patterns/sa/skills/sa/SKILL.md index dc610743..3b26caa2 100644 --- a/patterns/sa/skills/sa/SKILL.md +++ b/patterns/sa/skills/sa/SKILL.md @@ -299,8 +299,8 @@ DONE WHEN: - $CSA_SESSION_DIR/result.toml contains [result], [report], [timing], [tool], [artifacts] PLAN_EOF -SID=$(csa run --daemon --sa-mode true --prompt-file "$PROMPT_FILE") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --sa-mode true --prompt-file "$PROMPT_FILE") +csa session wait --session "$SID" ``` ### Template B: Implementation Dispatch (Manager -> Layer 1) @@ -341,8 +341,8 @@ DONE WHEN: - $CSA_SESSION_DIR/result.toml exists and is self-contained for manager decision IMPL_EOF -SID=$(csa run --daemon --sa-mode true --session "$SESSION_ID" --prompt-file "$PROMPT_FILE") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --sa-mode true --session "$SESSION_ID" --prompt-file "$PROMPT_FILE") +csa session wait --session "$SID" ``` ### Template C: Trust Verification Dispatch (Manager -> Reviewer Employee) @@ -377,8 +377,8 @@ DONE WHEN: - $CSA_SESSION_DIR/result.toml includes clear verdict in summary/report VERIFY_EOF -SID=$(csa run --daemon --sa-mode true --prompt-file "$PROMPT_FILE") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --sa-mode true --prompt-file "$PROMPT_FILE") +csa session wait --session "$SID" ``` ## Model Selection Guidelines diff --git a/patterns/sa/workflow.toml b/patterns/sa/workflow.toml index af2efec0..2f65671c 100644 --- a/patterns/sa/workflow.toml +++ b/patterns/sa/workflow.toml @@ -121,8 +121,8 @@ Layer 1 (claude-code) will: 4. Write `result.toml` to `$CSA_SESSION_DIR/result.toml` (with `todo_path = "$CSA_SESSION_DIR/artifacts/TODO.md"`) ```bash -SID=$(csa run --daemon --prompt-file "${PROMPT_FILE}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --prompt-file "${PROMPT_FILE}") +csa session wait --session "$SID" ```""" on_fail = "abort" @@ -186,8 +186,8 @@ Fix the underlying issues to ensure codebase integrity. ```bash IMPL_FILE=$(mktemp /tmp/sa-impl-XXXXXX.txt) echo "CSA_VAR:IMPL_FILE=$IMPL_FILE" -SID=$(csa run --daemon --session "${SESSION_ID}" --prompt-file "${IMPL_FILE}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --session "${SESSION_ID}" --prompt-file "${IMPL_FILE}") +csa session wait --session "$SID" ```""" on_fail = "abort" condition = "${USER_APPROVES}" @@ -202,8 +202,8 @@ Resume Layer 1 with user's revision feedback. ```bash RESUME_FILE=$(mktemp /tmp/sa-resume-XXXXXX.txt) echo "CSA_VAR:RESUME_FILE=$RESUME_FILE" -SID=$(csa run --daemon --session "${SESSION_ID}" --prompt-file "${RESUME_FILE}") -csa session wait --session "$SID" --timeout 1800 +SID=$(csa run --session "${SESSION_ID}" --prompt-file "${RESUME_FILE}") +csa session wait --session "$SID" ```""" on_fail = "abort" condition = "(!(${USER_APPROVES})) && (${USER_MODIFIES})" diff --git a/patterns/security-audit/PATTERN.md b/patterns/security-audit/PATTERN.md index 88f9b3aa..5348b4f3 100644 --- a/patterns/security-audit/PATTERN.md +++ b/patterns/security-audit/PATTERN.md @@ -45,11 +45,11 @@ OnFail: abort Module too large for local audit. Delegate entire audit to CSA. ```bash -SID=$(csa run --daemon "Perform security audit following security-audit skill protocol. +SID=$(csa run "Perform security audit following security-audit skill protocol. Review changed files and associated tests. Three phases: test completeness, vulnerability scan, code quality. Output structured audit report.") -csa session wait --session "$SID" --timeout 1800 +csa session wait --session "$SID" ``` ## ELSE From f6a2ce0983ccdeb6004264e23998b06940a78dfd Mon Sep 17 00:00:00 2001 From: Ryder Freeman Date: Tue, 31 Mar 2026 07:03:46 -0700 Subject: [PATCH 2/4] fix(daemon): write daemon.pid file and fix PID resolution for session kill spawn_daemon now writes a `daemon.pid` file to the session directory, which `read_daemon_pid` reads as the primary PID source. The previous approach (parsing stderr.log for the RPJ directive) failed because the directive is written to the parent's stderr, not the session's. Keeps the stderr fallback for legacy sessions spawned before this fix. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../cli-sub-agent/src/session_cmds_daemon.rs | 28 +++++++++++-------- crates/csa-process/src/daemon.rs | 5 ++++ 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/crates/cli-sub-agent/src/session_cmds_daemon.rs b/crates/cli-sub-agent/src/session_cmds_daemon.rs index 814d1cb7..f2f4d59d 100644 --- a/crates/cli-sub-agent/src/session_cmds_daemon.rs +++ b/crates/cli-sub-agent/src/session_cmds_daemon.rs @@ -16,19 +16,25 @@ fn is_pid_alive(pid: u32) -> bool { unsafe { libc::kill(pid as libc::pid_t, 0) == 0 } } -/// Read the daemon PID from the session directory's spool metadata. -/// Returns None if the PID file doesn't exist or can't be parsed. +/// Read the daemon PID from the session directory. +/// Primary source: `daemon.pid` file written by `spawn_daemon`. +/// Fallback: parse the `CSA:SESSION_STARTED` directive from stderr.log (legacy). fn read_daemon_pid(session_dir: &std::path::Path) -> Option { - // The daemon parent writes the PID to stdout.log's parent dir as pid.txt - // (not yet implemented); fall back to parsing stderr for the RPJ directive. + // Primary: daemon.pid file (written by spawn_daemon since v0.1.198). + let pid_path = session_dir.join("daemon.pid"); + if let Ok(content) = fs::read_to_string(&pid_path) + && let Ok(pid) = content.trim().parse() + { + return Some(pid); + } + // Fallback: parse stderr for the RPJ directive (legacy sessions). let stderr_path = session_dir.join("stderr.log"); - if let Ok(content) = fs::read_to_string(&stderr_path) { - // Parse "CSA:SESSION_STARTED id=... pid= ..." - if let Some(pid_start) = content.find("pid=") { - let rest = &content[pid_start + 4..]; - let pid_str: String = rest.chars().take_while(|c| c.is_ascii_digit()).collect(); - return pid_str.parse().ok(); - } + if let Ok(content) = fs::read_to_string(&stderr_path) + && let Some(pid_start) = content.find("pid=") + { + let rest = &content[pid_start + 4..]; + let pid_str: String = rest.chars().take_while(|c| c.is_ascii_digit()).collect(); + return pid_str.parse().ok(); } None } diff --git a/crates/csa-process/src/daemon.rs b/crates/csa-process/src/daemon.rs index febc3599..135664f9 100644 --- a/crates/csa-process/src/daemon.rs +++ b/crates/csa-process/src/daemon.rs @@ -80,6 +80,11 @@ pub fn spawn_daemon(config: DaemonSpawnConfig) -> Result { let pid = child.id(); + // Write daemon PID file for `csa session kill` and `wait` liveness checks. + let pid_path = config.session_dir.join("daemon.pid"); + std::fs::write(&pid_path, pid.to_string()) + .with_context(|| format!("failed to write {}", pid_path.display()))?; + // Detach: the daemon child will outlive us. We must not leave a // zombie, so `try_wait` reaps it if it already exited (unlikely) // and `forget` prevents the Drop impl from killing the child. From 4ac6da6dcb093575d8968ca58a7e797d4f0f3834 Mon Sep 17 00:00:00 2001 From: Ryder Freeman Date: Tue, 31 Mar 2026 07:11:05 -0700 Subject: [PATCH 3/4] fix(session-kill): use pid_t type and check kill() return value Address review findings R3/R4: use libc::pid_t instead of bare i32 cast, and check kill() return value with error reporting. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/cli-sub-agent/src/session_cmds_daemon.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/crates/cli-sub-agent/src/session_cmds_daemon.rs b/crates/cli-sub-agent/src/session_cmds_daemon.rs index f2f4d59d..1a8544e7 100644 --- a/crates/cli-sub-agent/src/session_cmds_daemon.rs +++ b/crates/cli-sub-agent/src/session_cmds_daemon.rs @@ -227,8 +227,11 @@ pub(crate) fn handle_session_kill(session: String, cd: Option) -> Result resolved.session_id, pid, ); // SAFETY: kill(-pid, SIGTERM) sends to the entire process group. - unsafe { - libc::kill(-(pid as i32), libc::SIGTERM); + let pgid = -(pid as libc::pid_t); + let rc = unsafe { libc::kill(pgid, libc::SIGTERM) }; + if rc != 0 { + let err = std::io::Error::last_os_error(); + eprintln!("Warning: SIGTERM failed for PID {pid}: {err}"); } // Grace period: wait up to 5 seconds for clean shutdown. @@ -246,8 +249,10 @@ pub(crate) fn handle_session_kill(session: String, cd: Option) -> Result resolved.session_id, ); // SAFETY: kill(-pid, SIGKILL) force-kills the entire process group. - unsafe { - libc::kill(-(pid as i32), libc::SIGKILL); + let rc = unsafe { libc::kill(pgid, libc::SIGKILL) }; + if rc != 0 { + let err = std::io::Error::last_os_error(); + eprintln!("Warning: SIGKILL failed for PID {pid}: {err}"); } // Wait for reaping. From 1aa4a6a17f28631940e0dcbc7b27b450e3d2151a Mon Sep 17 00:00:00 2001 From: Ryder Freeman Date: Tue, 31 Mar 2026 07:25:44 -0700 Subject: [PATCH 4/4] fix(session-kill): validate PID > 1 before sending signals Guard against invalid daemon PIDs (0 or 1) which would send signals to the caller's process group or init. Addresses gemini-code-assist security finding on PR #532. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/cli-sub-agent/src/session_cmds_daemon.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/cli-sub-agent/src/session_cmds_daemon.rs b/crates/cli-sub-agent/src/session_cmds_daemon.rs index 1a8544e7..63be4e35 100644 --- a/crates/cli-sub-agent/src/session_cmds_daemon.rs +++ b/crates/cli-sub-agent/src/session_cmds_daemon.rs @@ -213,6 +213,13 @@ pub(crate) fn handle_session_kill(session: String, cd: Option) -> Result ) })?; + if pid <= 1 { + anyhow::bail!( + "Refusing to kill PID {} — invalid daemon PID (would target init or caller's process group)", + pid, + ); + } + if !is_pid_alive(pid) { eprintln!( "Session {} (PID {}) is already dead",