From 283d535ae226b8623ba4d4130647a2771acb5810 Mon Sep 17 00:00:00 2001 From: jw Date: Fri, 27 Mar 2026 23:49:46 -0700 Subject: [PATCH 1/2] feat(agent): introduce POP dispatcher and Cheapest-First comms library --- bin/kp-post-office | 113 ++++++++++++++++++++++++++++++++++++++++++ lib/comms.py | 65 ++++++++++++++++++++++++ specs/pop-protocol.md | 76 ++++++++++++++++++++++++++++ 3 files changed, 254 insertions(+) create mode 100755 bin/kp-post-office create mode 100644 lib/comms.py create mode 100644 specs/pop-protocol.md diff --git a/bin/kp-post-office b/bin/kp-post-office new file mode 100755 index 0000000..837d0f3 --- /dev/null +++ b/bin/kp-post-office @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +set -e + +# kp-post-office +# A POSIX Maildir-compliant dispatcher for kinder·powers. +# Usage: +# kp-post-office init +# kp-post-office drop "Task Description" +# kp-post-office watch + +COMMAND=$1 +ROLE=$2 +MAIL_ROOT="${MAILDIR:-$HOME/.beads/mail}" +ROLE_DIR="$MAIL_ROOT/$ROLE" + +show_help() { + echo "kp-post-office: POSIX Maildir agent dispatcher" + echo "Usage:" + echo " kp-post-office init " + echo " kp-post-office drop \"Task context or instructions\"" + echo " kp-post-office watch " + exit 1 +} + +if [ -z "$COMMAND" ] || [ -z "$ROLE" ]; then + show_help +fi + +case "$COMMAND" in + init) + mkdir -p "$ROLE_DIR"/{new,cur,tmp,done} + chmod 1777 "$MAIL_ROOT" 2>/dev/null || true # Best effort sticky bit + echo "✓ Maildir initialized at $ROLE_DIR" + ;; + + drop) + PAYLOAD=$3 + if [ -z "$PAYLOAD" ]; then + echo "Error: Must provide a task payload to drop." + exit 1 + fi + mkdir -p "$ROLE_DIR"/{new,cur,tmp,done} + + # Maildir delivery protocol: write to tmp, then link/rename to new + MSG_ID="$(date +%s).$$" + TMP_FILE="$ROLE_DIR/tmp/$MSG_ID" + NEW_FILE="$ROLE_DIR/new/$MSG_ID" + + echo "$PAYLOAD" > "$TMP_FILE" + mv "$TMP_FILE" "$NEW_FILE" + echo "✓ Mail dropped for $ROLE: $MSG_ID" + ;; + + watch) + REPO=$3 + if [ -z "$REPO" ] || [ ! -d "$REPO" ]; then + echo "Error: Must provide a valid for the worker to execute in." + exit 1 + fi + + mkdir -p "$ROLE_DIR"/{new,cur,tmp,done} + echo "ℹ Watching $ROLE_DIR/new for role: $ROLE..." + echo "ℹ Workers will be forked into: $REPO" + + while true; do + # Use find to cleanly iterate over files + for msg in "$ROLE_DIR"/new/*; do + if [ -f "$msg" ]; then + base=$(basename "$msg") + CUR_FILE="$ROLE_DIR/cur/$base" + + # Atomic Claim: mv from new to cur + if mv "$msg" "$CUR_FILE" 2>/dev/null; then + echo "✓ Claimed mail $base. Forking Claude worker..." + + # FORK WORKER (runs in subshell) + ( + # 1. Switch to working repo + cd "$REPO" || exit 1 + + # 2. Read mail + TASK_CONTENT=$(cat "$CUR_FILE") + + echo "[Worker $base] Starting in $REPO..." + + # 3. Execute Claude (The Agent) + # Using the standard claude CLI, injecting the task + # We pipe yes to avoid it blocking forever if it expects stdin, + # or just run it as a command. + if command -v claude &> /dev/null; then + claude -p "You are the '$ROLE' agent. Execute the following task. When finished, summarize your work. Task: $TASK_CONTENT" + else + echo "[Worker $base] 'claude' command not found. Mocking execution..." + sleep 2 + fi + + # 4. Mark Done + mv "$CUR_FILE" "$ROLE_DIR/done/$base" + echo "[Worker $base] Finished and archived." + ) & + + # We don't wait for the background worker; we go back to polling mail. + fi + fi + done + sleep 2 + done + ;; + + *) + show_help + ;; +esac diff --git a/lib/comms.py b/lib/comms.py new file mode 100644 index 0000000..875b703 --- /dev/null +++ b/lib/comms.py @@ -0,0 +1,65 @@ +import httpx +import json +import subprocess +import shutil +import logging + +logger = logging.getLogger(__name__) + +def vm_generate(prompt: str, port: int = 8765, model: str = "default", max_tokens: int = 4096, timeout: float = 30.0, json_schema: dict = None) -> str | None: + """Talk to any OpenAI-compatible local server. Returns str or None.""" + body = {"model": model, "messages": [{"role": "user", "content": prompt}], "max_tokens": max_tokens} + if json_schema: + body["response_format"] = {"type": "json_schema", "json_schema": json_schema} + try: + r = httpx.post(f"http://127.0.0.1:{port}/v1/chat/completions", json=body, timeout=timeout) + r.raise_for_status() + return r.json()["choices"][0]["message"]["content"] + except Exception as e: + logger.debug(f"vm_generate failed on port {port}: {e}") + return None + +def gemini_generate(prompt: str, max_tokens: int = 4096) -> str | None: + """Gemini CLI (free OAuth). Returns str or None.""" + if not shutil.which("gemini"): + return None + try: + r = subprocess.run(["gemini", "generate", "--max-tokens", str(max_tokens), prompt], + capture_output=True, text=True, timeout=60) + return r.stdout.strip() if r.returncode == 0 else None + except Exception as e: + logger.debug(f"gemini_generate failed: {e}") + return None + +def generate(prompt: str, max_tokens: int = 4096, json_schema: dict = None) -> str: + """Cheapest-first: gemini CLI ($0) → GPU ($0) → cmax ($$$).""" + # 1. Gemini CLI (free) - CLI doesn't support json_schema + if not json_schema: + result = gemini_generate(prompt, max_tokens) + if result: + return result + + # 2. Local GPU + result = vm_generate(prompt, port=8765, max_tokens=max_tokens, json_schema=json_schema) + if result: + return result + + # 3. cmax (costs money) + result = vm_generate(prompt, port=8889, model="claude-sonnet-4-20250514", max_tokens=max_tokens, json_schema=json_schema) + if result: + return result + + raise RuntimeError("All inference backends failed") + +def health(port: int = 8765) -> bool: + """Check if a local VM is alive.""" + try: + r = httpx.get(f"http://127.0.0.1:{port}/v1/models", timeout=5) + return r.status_code == 200 + except Exception: + return False + +def discover() -> dict[str, int]: + """Find all live local inference servers.""" + ports = {"gpu": 8765, "cmax": 8889, "ollama": 11434, "lmstudio": 1234, "vllm": 8000} + return {name: port for name, port in ports.items() if health(port)} diff --git a/specs/pop-protocol.md b/specs/pop-protocol.md new file mode 100644 index 0000000..69a18a2 --- /dev/null +++ b/specs/pop-protocol.md @@ -0,0 +1,76 @@ +# Post Office Protocol (POP) Spec + +*Status: Draft*
+*Level: L3/L4 Orchestration* + +## 1. Vision +**Asynchronous agent coordination without RPC.** + +Agents communicate via the POSIX filesystem (`/var/mail` or `.beads/mail`). This protocol prioritizes **agency-preservation**, **atomicity**, and **observability**. Every task is a file; every claim is a `rename`. + +--- + +## 2. Mailbox Structure + +Based on the standard POSIX Maildir specification (`man maildir`), ensuring lock-free atomic deliveries. + +```text +~/.beads/mail/ +└── role-/ # Role-based mailbox + ├── tmp/ # Incomplete deliveries + ├── new/ # Pending tasks (Dropped here) + ├── cur/ # In-progress tasks (Claimed here) + └── done/ # Completed tasks (Archived) +``` + +--- + +## 3. Operations + +### 3.1 `Drop` (Tasking) +- **Actor:** Human or Dispatcher Agent. +- **Action:** Write task to `tmp/`. Once fully written, `mv tmp/ new/`. +- **Atomic Requirement:** The `mv` ensures the mail watcher never reads a partially written file. + +### 3.2 `Peek` (Observation) +- **Actor:** Any Agent. +- **Action:** List files in `new/`. +- **Note:** Non-destructive. + +### 3.3 `Claim` (Locking) +- **Actor:** The Mail Watcher. +- **Action:** `mv new/ cur/`. +- **Atomic Requirement:** POSIX `rename` is atomic. If multiple watchers run simultaneously, only one succeeds. + +### 3.4 `Fork` (Execution) +- **Actor:** Parent Dispatcher. +- **Action:** Upon successful claim, spawn a child sub-shell. +- **Context Switch:** The child process uses `cd ` to move into the working directory, leaving the parent watching the mailbox. + +### 3.5 `Close` (Completion) +- **Actor:** The Worker Child. +- **Action:** `mv cur/ done/` upon completion. + +--- + +## 4. Mail JSON Schema + +```json +{ + "bead_id": "meshly-123", + "rig": "meshly-backend", + "role": "auditor", + "context": { + "files": ["core/main.py", "api/routes/"], + "priority": 2, + "instructions": "Audit all audit emission sites." + }, + "invariants": ["ADR-010"], + "deadline": "2026-03-27T23:59:59Z" +} +``` + +--- + +## 5. The Witness (Monitoring) +The **Witness Agent** (`meshly-82x`) polls all `working/` directories. If a file's `mtime` is older than the `last_activity` heartbeat in the corresponding `bead`, the Witness moves the file to `dead/` and alerts the team. From 3e94a0c82382d65489347e3127b2abb633c14044 Mon Sep 17 00:00:00 2001 From: jw Date: Sat, 28 Mar 2026 01:46:54 -0700 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20adversarial=20hardening=20=E2=80=94?= =?UTF-8?q?=209=20security,=20reliability,=20and=20correctness=20fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wave 1 (P0-P2): - skills-core.js: path traversal via resolveSkillPath — validateSkillName() rejects ../, slashes, non-alphanumeric before path.join - comms.py: stale model ID → claude-sonnet-4-6-20250514, retry with backoff for ConnectError/TimeoutException, exception class in logs - agent-outcome-logger.py: 10MB rotation guard, crash-proof outer wrapper - test_scanner.py: temp file leak → TemporaryDirectory, +nested dir test Wave 2 (P1-P2, 5-agent team): - scanner.py: ReDoS in MUST/NEVER lookahead → capped .{0,120}, skip .git/node_modules/__pycache__ in scan_directory - setup.sh: auto-register PostToolUse hook via jq (idempotent), validate GSD dir exists before symlinking - install.sh: unsafe cd → subshells, platform detection for prebuilt binaries, --build flag, better error context from cargo - superpowers.js: sanitize OPENCODE_CONFIG_DIR against prompt injection, CRLF support in frontmatter regex, race condition guard on readFileSync - analyze-token-usage.py: bare except → typed catch, MODEL_PRICING table (opus/sonnet/haiku), cache read 90% discount, type-safe dicts --- .opencode/plugins/superpowers.js | 18 ++++- hooks/agent-outcome-logger.py | 20 ++++- lib/comms.py | 40 +++++++--- lib/skills-core.js | 28 ++++++- mcp-servers/install.sh | 93 +++++++++++++++++------- scanner.py | 17 ++++- setup.sh | 52 ++++++++++--- test_scanner.py | 50 +++++++------ tests/claude-code/analyze-token-usage.py | 80 ++++++++++++-------- 9 files changed, 286 insertions(+), 112 deletions(-) diff --git a/.opencode/plugins/superpowers.js b/.opencode/plugins/superpowers.js index 8ac9934..f1e4947 100644 --- a/.opencode/plugins/superpowers.js +++ b/.opencode/plugins/superpowers.js @@ -14,7 +14,7 @@ const __dirname = path.dirname(fileURLToPath(import.meta.url)); // Simple frontmatter extraction (avoid dependency on skills-core for bootstrap) const extractAndStripFrontmatter = (content) => { - const match = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/); + const match = content.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n([\s\S]*)$/); if (!match) return { frontmatter: {}, content }; const frontmatterStr = match[1]; @@ -33,6 +33,13 @@ const extractAndStripFrontmatter = (content) => { return { frontmatter, content: body }; }; +// Validate that a resolved path doesn't contain prompt-injection vectors +const sanitizePath = (p) => { + if (!p || typeof p !== 'string') return null; + if (/[`${}<>\n\r]/.test(p)) return null; + return p; +}; + // Normalize a path: trim whitespace, expand ~, resolve to absolute const normalizePath = (p, homeDir) => { if (!p || typeof p !== 'string') return null; @@ -49,7 +56,7 @@ const normalizePath = (p, homeDir) => { export const SuperpowersPlugin = async ({ client, directory }) => { const homeDir = os.homedir(); const superpowersSkillsDir = path.resolve(__dirname, '../../skills'); - const envConfigDir = normalizePath(process.env.OPENCODE_CONFIG_DIR, homeDir); + const envConfigDir = sanitizePath(normalizePath(process.env.OPENCODE_CONFIG_DIR, homeDir)); const configDir = envConfigDir || path.join(homeDir, '.config/opencode'); // Helper to generate bootstrap content @@ -58,7 +65,12 @@ export const SuperpowersPlugin = async ({ client, directory }) => { const skillPath = path.join(superpowersSkillsDir, 'using-superpowers', 'SKILL.md'); if (!fs.existsSync(skillPath)) return null; - const fullContent = fs.readFileSync(skillPath, 'utf8'); + let fullContent; + try { + fullContent = fs.readFileSync(skillPath, 'utf8'); + } catch { + return null; + } const { content } = extractAndStripFrontmatter(fullContent); const toolMapping = `**Tool Mapping for OpenCode:** diff --git a/hooks/agent-outcome-logger.py b/hooks/agent-outcome-logger.py index 4ca0c5b..9b89cb6 100755 --- a/hooks/agent-outcome-logger.py +++ b/hooks/agent-outcome-logger.py @@ -11,6 +11,18 @@ from datetime import datetime, timezone from pathlib import Path +MAX_FILE_BYTES = 10 * 1024 * 1024 # 10 MB + + +def _rotate_if_needed(out_file: Path) -> None: + """Rotate log file if it exceeds MAX_FILE_BYTES. Never raises.""" + try: + if out_file.exists() and out_file.stat().st_size > MAX_FILE_BYTES: + rotated = out_file.with_suffix(".jsonl.1") + out_file.replace(rotated) + except Exception as exc: + print(f"agent-outcome-logger: rotation failed: {exc}", file=sys.stderr) + def main(): try: @@ -59,9 +71,15 @@ def main(): out_dir.mkdir(parents=True, exist_ok=True) out_file = out_dir / "agent_outcomes.jsonl" + _rotate_if_needed(out_file) + with open(out_file, "a") as f: f.write(json.dumps(record, separators=(",", ":")) + "\n") if __name__ == "__main__": - main() + try: + main() + except Exception as exc: + print(f"agent-outcome-logger: unexpected error: {exc}", file=sys.stderr) + sys.exit(0) diff --git a/lib/comms.py b/lib/comms.py index 875b703..d07e788 100644 --- a/lib/comms.py +++ b/lib/comms.py @@ -1,23 +1,33 @@ import httpx -import json import subprocess import shutil import logging +import time logger = logging.getLogger(__name__) -def vm_generate(prompt: str, port: int = 8765, model: str = "default", max_tokens: int = 4096, timeout: float = 30.0, json_schema: dict = None) -> str | None: +def vm_generate(prompt: str, port: int = 8765, model: str = "default", max_tokens: int = 4096, timeout: float = 30.0, json_schema: dict | None = None) -> str | None: """Talk to any OpenAI-compatible local server. Returns str or None.""" body = {"model": model, "messages": [{"role": "user", "content": prompt}], "max_tokens": max_tokens} if json_schema: body["response_format"] = {"type": "json_schema", "json_schema": json_schema} - try: - r = httpx.post(f"http://127.0.0.1:{port}/v1/chat/completions", json=body, timeout=timeout) - r.raise_for_status() - return r.json()["choices"][0]["message"]["content"] - except Exception as e: - logger.debug(f"vm_generate failed on port {port}: {e}") - return None + last_exc: Exception | None = None + for attempt in range(2): + try: + r = httpx.post(f"http://127.0.0.1:{port}/v1/chat/completions", json=body, timeout=timeout) + r.raise_for_status() + return r.json()["choices"][0]["message"]["content"] + except (httpx.ConnectError, httpx.TimeoutException) as e: + last_exc = e + logger.debug(f"vm_generate [{type(e).__name__}] port {port}: {e}") + if attempt < 1: + time.sleep(1) + continue + except Exception as e: + logger.debug(f"vm_generate [{type(e).__name__}] port {port}: {e}") + return None + logger.debug(f"vm_generate [{type(last_exc).__name__}] port {port}: retries exhausted") + return None def gemini_generate(prompt: str, max_tokens: int = 4096) -> str | None: """Gemini CLI (free OAuth). Returns str or None.""" @@ -31,7 +41,7 @@ def gemini_generate(prompt: str, max_tokens: int = 4096) -> str | None: logger.debug(f"gemini_generate failed: {e}") return None -def generate(prompt: str, max_tokens: int = 4096, json_schema: dict = None) -> str: +def generate(prompt: str, max_tokens: int = 4096, json_schema: dict | None = None) -> str: """Cheapest-first: gemini CLI ($0) → GPU ($0) → cmax ($$$).""" # 1. Gemini CLI (free) - CLI doesn't support json_schema if not json_schema: @@ -45,7 +55,7 @@ def generate(prompt: str, max_tokens: int = 4096, json_schema: dict = None) -> s return result # 3. cmax (costs money) - result = vm_generate(prompt, port=8889, model="claude-sonnet-4-20250514", max_tokens=max_tokens, json_schema=json_schema) + result = vm_generate(prompt, port=8889, model="claude-sonnet-4-6-20250514", max_tokens=max_tokens, json_schema=json_schema) if result: return result @@ -60,6 +70,12 @@ def health(port: int = 8765) -> bool: return False def discover() -> dict[str, int]: - """Find all live local inference servers.""" + """Find all live local inference servers. + + Security note: probes localhost ports without authentication. + Intended for local dev environments only — do not expose the + returned port map to untrusted callers or use in production + without adding auth checks. + """ ports = {"gpu": 8765, "cmax": 8889, "ollama": 11434, "lmstudio": 1234, "vllm": 8000} return {name: port for name, port in ports.items() if health(port)} diff --git a/lib/skills-core.js b/lib/skills-core.js index 5e5bb70..40b453c 100644 --- a/lib/skills-core.js +++ b/lib/skills-core.js @@ -47,10 +47,25 @@ function extractFrontmatter(filePath) { return { name, description }; } catch (error) { + console.error(`[skills-core] Failed to extract frontmatter from ${filePath}:`, error.message); return { name: '', description: '' }; } } +/** + * Validate a skill name to prevent path traversal. + * Rejects names containing path separators, '..' sequences, or invalid characters. + * + * @param {string} name - Skill name (after stripping any 'superpowers:' prefix) + * @returns {string | null} - Validated name, or null if invalid + */ +function validateSkillName(name) { + if (!name || typeof name !== 'string') return null; + if (name.includes('/') || name.includes('\\') || name.includes('..')) return null; + if (!/^[a-zA-Z0-9][a-zA-Z0-9_-]*$/.test(name)) return null; + return name; +} + /** * Find all SKILL.md files in a directory recursively. * @@ -70,6 +85,11 @@ function findSkillsInDir(dir, sourceType, maxDepth = 3) { const entries = fs.readdirSync(currentDir, { withFileTypes: true }); for (const entry of entries) { + // Defense-in-depth: entry.name comes from readdirSync (OS-provided, + // already resolved), but validate anyway to guard against future + // refactors that might pass untrusted names through this path. + if (!validateSkillName(entry.name)) continue; + const fullPath = path.join(currentDir, entry.name); if (entry.isDirectory()) { @@ -110,6 +130,11 @@ function resolveSkillPath(skillName, superpowersDir, personalDir) { const forceSuperpowers = skillName.startsWith('superpowers:'); const actualSkillName = forceSuperpowers ? skillName.replace(/^superpowers:/, '') : skillName; + // Validate skill name to prevent path traversal + if (!validateSkillName(actualSkillName)) { + return null; + } + // Try personal skills first (unless explicitly superpowers:) if (!forceSuperpowers && personalDir) { const personalPath = path.join(personalDir, actualSkillName); @@ -204,5 +229,6 @@ export { findSkillsInDir, resolveSkillPath, checkForUpdates, - stripFrontmatter + stripFrontmatter, + validateSkillName }; diff --git a/mcp-servers/install.sh b/mcp-servers/install.sh index 0310b37..64ba9fe 100755 --- a/mcp-servers/install.sh +++ b/mcp-servers/install.sh @@ -7,16 +7,19 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" RUST_MIN_STACK=16777216 export RUST_MIN_STACK +FORCE_BUILD=0 + +# Parse flags +for arg in "$@"; do + case "$arg" in + --build) FORCE_BUILD=1 ;; + *) echo "Unknown flag: $arg"; exit 1 ;; + esac +done echo "=== kp-mcp-servers installer ===" echo "" -# Check prerequisites -if ! command -v cargo &>/dev/null; then - echo "ERROR: cargo not found. Install Rust: https://rustup.rs" - exit 1 -fi - if ! command -v claude &>/dev/null; then echo "WARNING: claude CLI not found — will build but skip registration" SKIP_REGISTER=1 @@ -24,31 +27,69 @@ else SKIP_REGISTER=0 fi -# Build kp-github-mcp -echo "[1/4] Building kp-github-mcp..." -cd "$SCRIPT_DIR/github" -cargo build --release 2>&1 | grep -E "Compiling kp-github|Finished|error" || true -GITHUB_BIN="$SCRIPT_DIR/github/target/release/kp-github-mcp" -if [ ! -f "$GITHUB_BIN" ]; then - echo "ERROR: kp-github-mcp build failed" - exit 1 +# Platform detection for pre-built binaries +OS="$(uname -s | tr '[:upper:]' '[:lower:]')" +ARCH="$(uname -m)" +case "$OS" in + linux) PLATFORM_DIR="linux-${ARCH}" ;; + darwin) PLATFORM_DIR="macos-${ARCH}" ;; + *) PLATFORM_DIR="" ;; +esac + +PREBUILT_DIR="$SCRIPT_DIR/bin/${PLATFORM_DIR}" + +# Try pre-built binaries first +if [ "$FORCE_BUILD" = "0" ] && [ -n "$PLATFORM_DIR" ] && [ -d "$PREBUILT_DIR" ]; then + PREBUILT_GITHUB="$PREBUILT_DIR/kp-github-mcp" + PREBUILT_SEQTHINK="$PREBUILT_DIR/kp-sequential-thinking" + + if [ -x "$PREBUILT_GITHUB" ] && [ -x "$PREBUILT_SEQTHINK" ]; then + echo "Found pre-built binaries for ${PLATFORM_DIR}" + GITHUB_BIN="$PREBUILT_GITHUB" + SEQTHINK_BIN="$PREBUILT_SEQTHINK" + echo " kp-github-mcp: $(du -h "$GITHUB_BIN" | cut -f1)" + echo " kp-sequential-thinking: $(du -h "$SEQTHINK_BIN" | cut -f1)" + echo " (use --build to force cargo build)" + echo "" + else + echo "Pre-built binaries incomplete for ${PLATFORM_DIR}, falling back to cargo build" + FORCE_BUILD=1 + fi +else + FORCE_BUILD=1 fi -echo " Built: $GITHUB_BIN ($(du -h "$GITHUB_BIN" | cut -f1))" - -# Build kp-sequential-thinking -echo "[2/4] Building kp-sequential-thinking..." -cd "$SCRIPT_DIR/sequential-thinking" -cargo build --release 2>&1 | grep -E "Compiling kp-sequential|Finished|error" || true -SEQTHINK_BIN="$SCRIPT_DIR/sequential-thinking/target/release/kp-sequential-thinking" -if [ ! -f "$SEQTHINK_BIN" ]; then - echo "ERROR: kp-sequential-thinking build failed" - exit 1 + +if [ "$FORCE_BUILD" = "1" ]; then + # Check prerequisites + if ! command -v cargo &>/dev/null; then + echo "ERROR: cargo not found. Install Rust: https://rustup.rs" + exit 1 + fi + + # Build kp-github-mcp (subshell preserves cwd) + echo "[1/4] Building kp-github-mcp..." + ( cd "$SCRIPT_DIR/github" && cargo build --release 2>&1 | grep -E "Compiling|Finished|error|warning" | tail -20 ) || true + GITHUB_BIN="$SCRIPT_DIR/github/target/release/kp-github-mcp" + if [ ! -f "$GITHUB_BIN" ]; then + echo "ERROR: kp-github-mcp build failed" + exit 1 + fi + echo " Built: $GITHUB_BIN ($(du -h "$GITHUB_BIN" | cut -f1))" + + # Build kp-sequential-thinking (subshell preserves cwd) + echo "[2/4] Building kp-sequential-thinking..." + ( cd "$SCRIPT_DIR/sequential-thinking" && cargo build --release 2>&1 | grep -E "Compiling|Finished|error|warning" | tail -20 ) || true + SEQTHINK_BIN="$SCRIPT_DIR/sequential-thinking/target/release/kp-sequential-thinking" + if [ ! -f "$SEQTHINK_BIN" ]; then + echo "ERROR: kp-sequential-thinking build failed" + exit 1 + fi + echo " Built: $SEQTHINK_BIN ($(du -h "$SEQTHINK_BIN" | cut -f1))" fi -echo " Built: $SEQTHINK_BIN ($(du -h "$SEQTHINK_BIN" | cut -f1))" if [ "$SKIP_REGISTER" = "1" ]; then echo "" - echo "Binaries built. Register manually with:" + echo "Binaries ready. Register manually with:" echo " claude mcp add kp-github --transport stdio -- $GITHUB_BIN" echo " claude mcp add kp-sequential-thinking --transport stdio -- $SEQTHINK_BIN" exit 0 diff --git a/scanner.py b/scanner.py index 1aedf09..4f63158 100644 --- a/scanner.py +++ b/scanner.py @@ -83,13 +83,13 @@ class Finding: # Tier 3: MUST/NEVER without escape (medium severity) # These need context - only flag if no escape clause nearby { - "pattern": r"\bMUST\b(?!.*\b(unless|except|if|when|consider)\b)", + "pattern": r"\bMUST\b(?!.{0,120}\b(unless|except|if|when|consider)\b)", "severity": "medium", "case_sensitive": True, "suggestion": "Add escape clause or replace with 'should strongly consider'", }, { - "pattern": r"\bNEVER\b(?!.*\b(unless|except|if|when|rarely)\b)", + "pattern": r"\bNEVER\b(?!.{0,120}\b(unless|except|if|when|rarely)\b)", "severity": "medium", "case_sensitive": True, "suggestion": "Add exception cases or replace with 'avoid' + consequences", @@ -158,10 +158,21 @@ def scan_file(path: Path) -> Iterator[Finding]: ) +_SKIP_DIRS = frozenset({"node_modules", "__pycache__"}) + + +def _should_skip(path: Path) -> bool: + """Return True if any path segment is hidden (dot-prefixed) or in the skip list.""" + for part in path.parts: + if part.startswith(".") or part in _SKIP_DIRS: + return True + return False + + def scan_directory(path: Path, extensions: tuple = (".md",)) -> Iterator[Finding]: """Recursively scan a directory for compulsion language.""" for file in path.rglob("*"): - if file.suffix in extensions: + if file.suffix in extensions and not _should_skip(file.relative_to(path)): yield from scan_file(file) diff --git a/setup.sh b/setup.sh index c285960..a8ee4ff 100755 --- a/setup.sh +++ b/setup.sh @@ -58,7 +58,11 @@ link_file() { # GSD workflows reference ~/.claude/get-shit-done at runtime echo "[1/3] GSD runtime" mkdir -p "${CLAUDE_DIR}" -link_dir "${PLUGIN_ROOT}/gsd" "${CLAUDE_DIR}/get-shit-done" +if [ -d "${PLUGIN_ROOT}/gsd" ]; then + link_dir "${PLUGIN_ROOT}/gsd" "${CLAUDE_DIR}/get-shit-done" +else + echo " WARN: ${PLUGIN_ROOT}/gsd not found — skipping GSD symlink" +fi # NOTE: GSD commands and agents are NOT symlinked here. # The plugin system registers them under the kinderpowers: namespace @@ -114,20 +118,44 @@ fi # Register in settings.json (idempotent) SETTINGS_FILE="${CLAUDE_DIR}/settings.json" -if [ -f "$SETTINGS_FILE" ]; then - # Check if hook is already registered - if grep -q "agent-outcome-logger" "$SETTINGS_FILE" 2>/dev/null; then - echo " OK: hook already registered in settings.json" +HOOK_ENTRY="{\"matcher\":\"Agent\",\"command\":\"python3 ${HOOK_DST}\"}" + +if grep -q "agent-outcome-logger" "$SETTINGS_FILE" 2>/dev/null; then + echo " OK: hook already registered in settings.json" +elif command -v jq >/dev/null 2>&1; then + # jq available — auto-register + if [ ! -f "$SETTINGS_FILE" ]; then + # Create settings.json with just the hooks section + jq -n --argjson entry "$HOOK_ENTRY" \ + '{"hooks":{"PostToolUse":[$entry]}}' > "$SETTINGS_FILE" + echo " OK: created ${SETTINGS_FILE} with PostToolUse hook" + elif ! jq -e '.hooks' "$SETTINGS_FILE" >/dev/null 2>&1; then + # File exists but no hooks key — add it + jq --argjson entry "$HOOK_ENTRY" \ + '.hooks = {"PostToolUse":[$entry]}' "$SETTINGS_FILE" > "${SETTINGS_FILE}.tmp" \ + && mv "${SETTINGS_FILE}.tmp" "$SETTINGS_FILE" + echo " OK: added hooks.PostToolUse to settings.json" + elif ! jq -e '.hooks.PostToolUse' "$SETTINGS_FILE" >/dev/null 2>&1; then + # hooks exists but no PostToolUse array — add it + jq --argjson entry "$HOOK_ENTRY" \ + '.hooks.PostToolUse = [$entry]' "$SETTINGS_FILE" > "${SETTINGS_FILE}.tmp" \ + && mv "${SETTINGS_FILE}.tmp" "$SETTINGS_FILE" + echo " OK: added PostToolUse array to settings.json" else - echo " NOTE: Add this to your settings.json hooks.PostToolUse array:" - echo ' {' - echo ' "matcher": "Agent",' - echo " \"command\": \"python3 ${HOOK_DST}\"" - echo ' }' - echo " (Manual step — setup.sh does not modify settings.json directly)" + # PostToolUse array exists — append our entry + jq --argjson entry "$HOOK_ENTRY" \ + '.hooks.PostToolUse += [$entry]' "$SETTINGS_FILE" > "${SETTINGS_FILE}.tmp" \ + && mv "${SETTINGS_FILE}.tmp" "$SETTINGS_FILE" + echo " OK: appended agent-outcome-logger to PostToolUse hooks" fi else - echo " NOTE: ${SETTINGS_FILE} not found — create it and add the PostToolUse hook" + # No jq — fall back to manual instructions + echo " NOTE: jq not found — cannot auto-register hook." + echo " Add this to your ${SETTINGS_FILE} hooks.PostToolUse array:" + echo ' {' + echo ' "matcher": "Agent",' + echo " \"command\": \"python3 ${HOOK_DST}\"" + echo ' }' fi echo "" diff --git a/test_scanner.py b/test_scanner.py index 73cc5b8..659930a 100644 --- a/test_scanner.py +++ b/test_scanner.py @@ -4,64 +4,56 @@ import tempfile from pathlib import Path -from scanner import Finding, scan_file, scan_directory +from scanner import scan_file, scan_directory -def _write_temp(content: str, suffix: str = ".md") -> Path: - """Write content to a temp file and return its path.""" - f = tempfile.NamedTemporaryFile(mode="w", suffix=suffix, delete=False) - f.write(content) - f.close() - return Path(f.name) +def _scan_content(content: str, suffix: str = ".md"): + """Write content to a temp file, scan it, and clean up automatically.""" + with tempfile.TemporaryDirectory() as d: + path = Path(d) / f"test{suffix}" + path.write_text(content) + return list(scan_file(path)) def test_detects_iron_law(): - path = _write_temp("## The Iron Law\nDo the thing.\n") - findings = list(scan_file(path)) + findings = _scan_content("## The Iron Law\nDo the thing.\n") assert any(f.severity == "high" and "Iron Law" in f.pattern for f in findings) def test_detects_not_negotiable(): - path = _write_temp("This is not negotiable.\n") - findings = list(scan_file(path)) + findings = _scan_content("This is not negotiable.\n") assert any(f.severity == "high" for f in findings) def test_detects_must_without_escape(): - path = _write_temp("You MUST do this.\n") - findings = list(scan_file(path)) + findings = _scan_content("You MUST do this.\n") assert any(f.severity == "medium" for f in findings) def test_allows_must_with_escape(): - path = _write_temp("You MUST do this unless there's a good reason.\n") - findings = list(scan_file(path)) + findings = _scan_content("You MUST do this unless there's a good reason.\n") must_findings = [f for f in findings if "MUST" in f.pattern] assert len(must_findings) == 0 def test_detects_never_without_escape(): - path = _write_temp("NEVER do this.\n") - findings = list(scan_file(path)) + findings = _scan_content("NEVER do this.\n") assert any(f.severity == "medium" for f in findings) def test_allows_never_with_escape(): - path = _write_temp("NEVER do this unless you understand the consequences.\n") - findings = list(scan_file(path)) + findings = _scan_content("NEVER do this unless you understand the consequences.\n") never_findings = [f for f in findings if "NEVER" in f.pattern] assert len(never_findings) == 0 def test_clean_file(): - path = _write_temp("This is agency-preserving guidance.\nStrongly recommended.\n") - findings = list(scan_file(path)) + findings = _scan_content("This is agency-preserving guidance.\nStrongly recommended.\n") assert len(findings) == 0 def test_detects_delete_start_over(): - path = _write_temp("Delete it. Start over.\n") - findings = list(scan_file(path)) + findings = _scan_content("Delete it. Start over.\n") assert any(f.severity == "medium" for f in findings) @@ -74,6 +66,18 @@ def test_directory_scan(): assert any(f.severity == "high" for f in findings) +def test_directory_scan_nested(): + with tempfile.TemporaryDirectory() as d: + subdir = Path(d) / "level1" / "level2" + subdir.mkdir(parents=True) + (subdir / "deep.md").write_text("## The Iron Law\nObey.\n") + (Path(d) / "top.md").write_text("Good guidance.\n") + findings = list(scan_directory(Path(d))) + assert any(f.severity == "high" and "Iron Law" in f.pattern for f in findings) + deep_findings = [f for f in findings if "deep.md" in str(f.file)] + assert len(deep_findings) >= 1, "scan_directory must find files in nested subdirectories" + + if __name__ == "__main__": tests = [v for k, v in globals().items() if k.startswith("test_")] for test in tests: diff --git a/tests/claude-code/analyze-token-usage.py b/tests/claude-code/analyze-token-usage.py index 44d473d..2c4caf4 100755 --- a/tests/claude-code/analyze-token-usage.py +++ b/tests/claude-code/analyze-token-usage.py @@ -4,11 +4,19 @@ Breaks down usage by main session and individual subagents. """ +import argparse import json import sys from pathlib import Path from collections import defaultdict +# Per-million-token pricing: (input, output) +MODEL_PRICING = { + "opus": (15.0, 75.0), # Opus 4.6 + "sonnet": (3.0, 15.0), # Sonnet 4.6 (default) + "haiku": (0.80, 4.0), # Haiku 4.5 +} + def analyze_main_session(filepath): """Analyze a session file and return token usage broken down by agent.""" main_usage = { @@ -20,14 +28,14 @@ def analyze_main_session(filepath): } # Track usage per subagent - subagent_usage = defaultdict(lambda: { + subagent_usage: dict[str, dict[str, int]] = defaultdict(lambda: { 'input_tokens': 0, 'output_tokens': 0, 'cache_creation': 0, 'cache_read': 0, 'messages': 0, - 'description': None }) + subagent_descriptions: dict[str, str] = {} with open(filepath, 'r') as f: for line in f: @@ -51,51 +59,53 @@ def analyze_main_session(filepath): usage = result['usage'] # Get description from prompt if available - if subagent_usage[agent_id]['description'] is None: + if agent_id not in subagent_descriptions: prompt = result.get('prompt', '') - # Extract first line as description first_line = prompt.split('\n')[0] if prompt else f"agent-{agent_id}" if first_line.startswith('You are '): - first_line = first_line[8:] # Remove "You are " - subagent_usage[agent_id]['description'] = first_line[:60] + first_line = first_line[8:] + subagent_descriptions[agent_id] = first_line[:60] subagent_usage[agent_id]['messages'] += 1 subagent_usage[agent_id]['input_tokens'] += usage.get('input_tokens', 0) subagent_usage[agent_id]['output_tokens'] += usage.get('output_tokens', 0) subagent_usage[agent_id]['cache_creation'] += usage.get('cache_creation_input_tokens', 0) subagent_usage[agent_id]['cache_read'] += usage.get('cache_read_input_tokens', 0) - except: + except (json.JSONDecodeError, KeyError, TypeError): pass - return main_usage, dict(subagent_usage) + return main_usage, dict(subagent_usage), subagent_descriptions def format_tokens(n): """Format token count with thousands separators.""" return f"{n:,}" -def calculate_cost(usage, input_cost_per_m=3.0, output_cost_per_m=15.0): - """Calculate estimated cost in dollars.""" - total_input = usage['input_tokens'] + usage['cache_creation'] + usage['cache_read'] - input_cost = total_input * input_cost_per_m / 1_000_000 - output_cost = usage['output_tokens'] * output_cost_per_m / 1_000_000 - return input_cost + output_cost +def calculate_cost(usage, input_rate=3.0, output_rate=15.0): + """Calculate estimated cost in dollars with cache discounts.""" + input_cost = usage['input_tokens'] * input_rate / 1_000_000 + cache_write_cost = usage['cache_creation'] * input_rate * 1.25 / 1_000_000 + cache_read_cost = usage['cache_read'] * input_rate * 0.10 / 1_000_000 + output_cost = usage['output_tokens'] * output_rate / 1_000_000 + return input_cost + cache_write_cost + cache_read_cost + output_cost def main(): - if len(sys.argv) < 2: - print("Usage: analyze-token-usage.py ") + parser = argparse.ArgumentParser(description="Analyze token usage from Claude Code session transcripts.") + parser.add_argument("session_file", help="Path to session JSONL file") + parser.add_argument("--model", choices=MODEL_PRICING.keys(), default="sonnet", + help="Pricing tier (default: sonnet)") + args = parser.parse_args() + + if not Path(args.session_file).exists(): + print(f"Error: Session file not found: {args.session_file}") sys.exit(1) - main_session_file = sys.argv[1] - - if not Path(main_session_file).exists(): - print(f"Error: Session file not found: {main_session_file}") - sys.exit(1) + input_rate, output_rate = MODEL_PRICING[args.model] # Analyze the session - main_usage, subagent_usage = analyze_main_session(main_session_file) + main_usage, subagent_usage, subagent_descriptions = analyze_main_session(args.session_file) print("=" * 100) - print("TOKEN USAGE ANALYSIS") + print(f"TOKEN USAGE ANALYSIS (pricing: {args.model} — ${input_rate}/${output_rate} per M input/output)") print("=" * 100) print() @@ -106,7 +116,7 @@ def main(): print("-" * 100) # Main session - cost = calculate_cost(main_usage) + cost = calculate_cost(main_usage, input_rate, output_rate) print(f"{'main':<15} {'Main session (coordinator)':<35} " f"{main_usage['messages']:>5} " f"{format_tokens(main_usage['input_tokens']):>10} " @@ -117,8 +127,8 @@ def main(): # Subagents (sorted by agent ID) for agent_id in sorted(subagent_usage.keys()): usage = subagent_usage[agent_id] - cost = calculate_cost(usage) - desc = usage['description'] or f"agent-{agent_id}" + cost = calculate_cost(usage, input_rate, output_rate) + desc = subagent_descriptions.get(agent_id, f"agent-{agent_id}") print(f"{agent_id:<15} {desc:<35} " f"{usage['messages']:>5} " f"{format_tokens(usage['input_tokens']):>10} " @@ -126,8 +136,6 @@ def main(): f"{format_tokens(usage['cache_read']):>10} " f"${cost:>7.2f}") - print("-" * 100) - # Calculate totals total_usage = { 'input_tokens': main_usage['input_tokens'], @@ -144,9 +152,20 @@ def main(): total_usage['cache_read'] += usage['cache_read'] total_usage['messages'] += usage['messages'] + total_cost = calculate_cost(total_usage, input_rate, output_rate) + + # Total row in the table + print("-" * 100) + print(f"{'TOTAL':<15} {'':<35} " + f"{total_usage['messages']:>5} " + f"{format_tokens(total_usage['input_tokens']):>10} " + f"{format_tokens(total_usage['output_tokens']):>10} " + f"{format_tokens(total_usage['cache_read']):>10} " + f"${total_cost:>7.2f}") + print("=" * 100) + total_input = total_usage['input_tokens'] + total_usage['cache_creation'] + total_usage['cache_read'] total_tokens = total_input + total_usage['output_tokens'] - total_cost = calculate_cost(total_usage) print() print("TOTALS:") @@ -160,9 +179,8 @@ def main(): print(f" Total tokens: {format_tokens(total_tokens)}") print() print(f" Estimated cost: ${total_cost:.2f}") - print(" (at $3/$15 per M tokens for input/output)") + print(f" (pricing: {args.model} — ${input_rate}/${output_rate} per M, cache read 90% discount, cache write 25% surcharge)") print() - print("=" * 100) if __name__ == '__main__': main()