diff --git a/.gitignore b/.gitignore index 85cdf76c0..53390090a 100644 --- a/.gitignore +++ b/.gitignore @@ -63,6 +63,11 @@ ios-signing/ /apps/desktop/.ade /.ade/shipLane/ /.ade/logs/ + +# Ephemeral orchestrator/agent hand-off files created during setup — not meant for version control +/goal.md +/codexGoal.md +/*-next-agent-prompt.md /.playwright-mcp /.codex-derived-data package-lock.json diff --git a/apps/ade-cli/README.md b/apps/ade-cli/README.md index f36f6476f..00a251609 100644 --- a/apps/ade-cli/README.md +++ b/apps/ade-cli/README.md @@ -311,6 +311,8 @@ ade link branch owner/repo my-branch --pr 42 ade link pr owner/repo 42 --ade ade link linear-issue ADE-123 --branch arul/ade-123-fix ade linear install +ade skill list --text +ade skill show ade-browser --text ``` Use typed commands first. They validate common arguments and provide stable JSON fields or readable text summaries. Use `ade help ` for exact flags, `ade actions list --text` to discover the full service-backed action catalog, and `ade actions run ` only when there is no typed command for the workflow yet. diff --git a/apps/ade-cli/src/adeRpcServer.test.ts b/apps/ade-cli/src/adeRpcServer.test.ts index 104f5c5c8..466daa53d 100644 --- a/apps/ade-cli/src/adeRpcServer.test.ts +++ b/apps/ade-cli/src/adeRpcServer.test.ts @@ -1873,14 +1873,14 @@ describe("adeRpcServer", () => { // it ends with the user prompt and carries the inline guidance preamble. const createCall = (fixture.runtime.ptyService.create as ReturnType).mock.calls[0]?.[0] as { args: string[] }; const finalArg = createCall.args[createCall.args.length - 1]; - expect(finalArg).toContain("only normal reason to skip ADE CLI"); - expect(finalArg).toContain("ADE proof drawer"); - expect(finalArg).toContain("clean up old, stale, or finished processes"); + expect(finalArg).toContain("control plane for ADE state"); + expect(finalArg).toContain("proof & screenshots"); + expect(finalArg).toContain("clean up processes you start"); expect(finalArg.endsWith("Implement API wiring")).toBe(true); expect(response.structuredContent.startupCommand).toContain("claude"); expect(response.structuredContent.startupCommand).toContain("--model"); expect(response.structuredContent.startupCommand).toContain("--permission-mode"); - expect(response.structuredContent.startupCommand).toContain("only normal reason to skip ADE CLI"); + expect(response.structuredContent.startupCommand).toContain("control plane for ADE state"); expect(response.structuredContent.permissionMode).toBe("default"); expect(response.structuredContent.contextRef?.path).toBeNull(); }); @@ -2439,7 +2439,7 @@ describe("adeRpcServer", () => { expect(response.structuredContent.permissionMode).toBe("plan"); expect(response.structuredContent.startupCommand).toContain("--sandbox"); expect(response.structuredContent.startupCommand).toContain("read-only"); - expect(response.structuredContent.startupCommand).toContain("only normal reason to skip ADE CLI"); + expect(response.structuredContent.startupCommand).toContain("control plane for ADE state"); const contextPath = response.structuredContent.contextRef?.path as string | null; expect(contextPath).toBeTruthy(); expect(contextPath?.includes("/.ade/cache/orchestrator/agent-context/run-123/")).toBe(true); diff --git a/apps/ade-cli/src/bootstrap.ts b/apps/ade-cli/src/bootstrap.ts index 2f7540194..02cdf11d0 100644 --- a/apps/ade-cli/src/bootstrap.ts +++ b/apps/ade-cli/src/bootstrap.ts @@ -6,6 +6,7 @@ import * as nodePty from "node-pty"; import { createFileLogger, type Logger } from "../../desktop/src/main/services/logging/logger"; import { openKvDb, type AdeDb } from "../../desktop/src/main/services/state/kvDb"; import { detectDefaultBaseRef, toProjectInfo, upsertProjectRow } from "../../desktop/src/main/services/projects/projectService"; +import { reseedAdeSkills } from "../../desktop/src/main/services/skills/skillReseedService"; import { createAdeProjectService, initializeOrRepairAdeProject, @@ -378,7 +379,28 @@ function inferAgentSkillsRootForCliEntry(cliEntry: string | null): string | null return null; } +let adeSkillsReseededForCli = false; + +/** + * Materialize ADE's bundled `ade-*` skills into the home-level skill dirs every + * runtime natively discovers, so agents ADE spawns pick them up via the runtime's + * own progressive disclosure. Cheap no-op once on-disk copies are current; + * best-effort so an unwritable home dir never blocks the CLI. + */ +export function reseedBundledAdeSkillsForCli(): void { + if (adeSkillsReseededForCli) return; + if (process.env.ADE_DISABLE_SKILL_RESEED === "1" || process.env.VITEST) return; + adeSkillsReseededForCli = true; + try { + const bundledRoot = inferAgentSkillsRootForCliEntry(resolveCurrentAdeCliEntry()); + if (bundledRoot) reseedAdeSkills({ bundledRoot }); + } catch { + /* best-effort: skill re-seeding must never break agent launch */ + } +} + function createHeadlessAdeCliAgentEnv(baseEnv: NodeJS.ProcessEnv = process.env): NodeJS.ProcessEnv { + reseedBundledAdeSkillsForCli(); const next: NodeJS.ProcessEnv = { ...baseEnv }; const nextPath = augmentProcessPathWithShellAndKnownCliDirs({ env: next, diff --git a/apps/ade-cli/src/cli.ts b/apps/ade-cli/src/cli.ts index a56396232..e8cc900fc 100644 --- a/apps/ade-cli/src/cli.ts +++ b/apps/ade-cli/src/cli.ts @@ -17,6 +17,10 @@ import { CliDeeplinkUsageError, runDeeplinkCommand, } from "./commands/deeplinks"; +import { + CliSkillUsageError, + runSkillCommand, +} from "./commands/skill"; import { buildDeeplink } from "../../desktop/src/shared/deeplinks"; import { AUTOMATIONS_COMING_SOON_MESSAGE, @@ -55,6 +59,7 @@ import { MACOS_VM_PHASES } from "../../desktop/src/shared/types/macosVm"; import type { AdeServiceCommand } from "./serviceManager/common"; import { normalizeAdeRuntimeRole, resolveAdeDefaultRole } from "./runtimeRoles"; import type { AdeRuntime } from "./bootstrap"; +import { reseedBundledAdeSkillsForCli } from "./bootstrap"; import { EncryptedFileCredentialStore } from "./services/credentials/credentialStore"; type JsonObject = Record; @@ -166,7 +171,8 @@ type CliPlan = | { kind: "pty-host-worker" } | { kind: "init"; targetPath: string | null } | { kind: "cursor-cloud"; rest: string[] } - | { kind: "deeplink"; rest: string[] }; + | { kind: "deeplink"; rest: string[] } + | { kind: "skill"; rest: string[] }; type CliConnection = { mode: "desktop-socket" | "runtime-socket" | "headless"; @@ -427,6 +433,7 @@ const TOP_LEVEL_HELP = `${ADE_BANNER} $ ade link lane | session | branch | pr | linear-issue Build a shareable deeplink (copies to clipboard) $ ade linear install Register ADE as Linear's "Open in coding tool" target + $ ade skill list | show Browse ADE's bundled agent skills (no daemon) $ ade runtime start | stop | status Manage the machine runtime daemon $ ade serve Run the ADE runtime daemon in foreground $ ade rpc --stdio Speak ADE JSON-RPC over stdin/stdout @@ -958,6 +965,23 @@ const HELP_BY_COMMAND: Record = { Flags: --ade Emit the custom "ade://" form. Defaults to the https mirror. --no-clipboard Print the URL but do not copy it to the system clipboard. +`, + skill: `${ADE_BANNER} + ADE Skills + + Browse ADE's bundled, version-locked agent skills directly from the bundled + resources. This is a local command that does NOT require the runtime daemon — + it is the tamper-proof backstop for agents that can't natively discover + ADE's skills. + + $ ade skill list List bundled skills (JSON: name, description, path) + $ ade skill list --text One "name — description" line per skill + $ ade skill show Print a skill's SKILL.md (JSON: name, description, content, path) + $ ade skill show --text Print just the skill's markdown body + + Flags: + --text Human-readable output. + --json Structured JSON output (default). `, runtime: `${ADE_BANNER} ADE Runtime @@ -10411,6 +10435,7 @@ function buildCliPlan(command: string[]): CliPlan { project: "projects", quota: "usage", quotas: "usage", + skills: "skill", }; const primaryHelpKey = aliases[primary] ?? primary; if (hasHelpFlag(args)) { @@ -10463,6 +10488,10 @@ function buildCliPlan(command: string[]): CliPlan { // dispatcher can branch on it; reconstruct rest accordingly. return { kind: "deeplink", rest: [primary, ...args] }; } + if (primary === "skill" || primary === "skills") { + // Local (non-RPC) bundled-agent-skill browser; no daemon required. + return { kind: "skill", rest: args }; + } if (primary === "linear") { // `ade linear install` is the deeplink installer; every other `ade linear` // subcommand (workflows, sync, quick-view, route, picker-data, ...) belongs @@ -15369,6 +15398,20 @@ async function runCli( output: plan.text.endsWith("\n") ? plan.text : `${plan.text}\n`, exitCode: 0, }; + // Ensure ADE's bundled skills are seeded into the home-level dirs every runtime + // discovers, but only on the paths that actually launch an agent/runtime/skill — + // cheap commands like `ade help` and `ade --version` must not pay the scan/hash + // cost (cheap no-op when already current). + if ( + plan.kind === "skill" || + plan.kind === "ade-code" || + plan.kind === "runtime" || + plan.kind === "serve" || + (plan.kind === "execute" && + /^(agent spawn|chat create|shell start cli)\b/.test(plan.label)) + ) { + reseedBundledAdeSkillsForCli(); + } const originalConsole = { log: console.log, info: console.info, @@ -15432,6 +15475,20 @@ async function runCli( throw error; } } + if (plan.kind === "skill") { + try { + // The global parser folds --text/--json into parsed.options.text; + // forward that choice to the local skill command (default = JSON). + const rest = [...plan.rest, parsed.options.text ? "--text" : "--json"]; + const result = runSkillCommand(rest); + return { output: result.output, exitCode: result.exitCode }; + } catch (error) { + if (error instanceof CliSkillUsageError) { + throw new CliUsageError(error.message); + } + throw error; + } + } if (plan.kind === "runtime") { const result = await runRuntimeCommand(plan.rest, parsed.options); return { diff --git a/apps/ade-cli/src/commands/skill.test.ts b/apps/ade-cli/src/commands/skill.test.ts new file mode 100644 index 000000000..4bcfeb06b --- /dev/null +++ b/apps/ade-cli/src/commands/skill.test.ts @@ -0,0 +1,70 @@ +import { describe, expect, it } from "vitest"; + +import { CliSkillUsageError, runSkillCommand, runSkillList, runSkillShow } from "./skill"; + +describe("ade skill (bundled agent skills)", () => { + it("list --json returns entries for known bundled skills", () => { + const result = runSkillList(["--json"]); + expect(result.exitCode).toBe(0); + const parsed = JSON.parse(result.output) as Array<{ + name: string; + description: string; + path: string; + }>; + const names = parsed.map((entry) => entry.name); + expect(names).toContain("ade-browser"); + expect(names).toContain("ade-cli-control-plane"); + // Sorted + each entry carries a path to a SKILL.md. + expect([...names]).toEqual([...names].sort((a, b) => a.localeCompare(b))); + for (const entry of parsed) { + expect(entry.path).toMatch(/SKILL\.md$/); + } + }); + + it("list --text emits one name — description line per skill", () => { + const result = runSkillList(["--text"]); + expect(result.exitCode).toBe(0); + expect(result.output).toMatch(/ade-browser —/); + }); + + it("show --json returns full content including frontmatter name", () => { + const result = runSkillShow(["ade-browser", "--json"]); + expect(result.exitCode).toBe(0); + const parsed = JSON.parse(result.output) as { + name: string; + description: string; + content: string; + path: string; + }; + expect(parsed.name).toBe("ade-browser"); + expect(parsed.content).toContain("name: ade-browser"); + expect(parsed.content).toContain("---"); + }); + + it("show --text prints the markdown body without frontmatter delimiters at the top", () => { + const result = runSkillShow(["ade-browser", "--text"]); + expect(result.exitCode).toBe(0); + expect(result.output.trimStart().startsWith("---")).toBe(false); + expect(result.output.length).toBeGreaterThan(0); + }); + + it("show errors clearly for an unknown skill and lists available names", () => { + expect(() => runSkillShow(["does-not-exist"])).toThrowError(/Unknown skill/); + try { + runSkillShow(["does-not-exist"]); + } catch (error) { + expect((error as Error).message).toContain("ade-browser"); + } + }); + + it("top-level dispatch prints help with no args", () => { + const result = runSkillCommand([]); + expect(result.exitCode).toBe(0); + expect(result.output).toContain("ade skill list"); + }); + + it("top-level dispatch rejects an unknown subcommand with a usage error", () => { + expect(() => runSkillCommand(["frobnicate"])).toThrowError(CliSkillUsageError); + expect(() => runSkillCommand(["frobnicate"])).toThrowError(/Unknown skill subcommand/); + }); +}); diff --git a/apps/ade-cli/src/commands/skill.ts b/apps/ade-cli/src/commands/skill.ts new file mode 100644 index 000000000..06650c0d4 --- /dev/null +++ b/apps/ade-cli/src/commands/skill.ts @@ -0,0 +1,224 @@ +// --------------------------------------------------------------------------- +// Local (non-RPC) CLI command for ADE's bundled agent skills. +// +// ade skill list [--text|--json] +// ade skill show [--text|--json] +// +// This is the tamper-proof, version-locked backstop for agents that can't +// natively discover ADE's bundled skills. It reads the ADE-versioned +// `agent-skills//SKILL.md` files directly from the bundled resources +// root (resolved via agentSkillRoots), so it works headless without the +// runtime daemon — exactly like the `ade open`/`ade link` deeplink commands. +// --------------------------------------------------------------------------- + +import fs from "node:fs"; +import path from "node:path"; + +import { parse as parseYaml } from "yaml"; + +import { getAdeAgentSkillRootCandidates } from "../../../desktop/src/shared/agentSkillRoots"; + +export class CliSkillUsageError extends Error {} + +export type SkillCliResult = { + output: string; + exitCode: number; +}; + +const HELP_SKILL = [ + "Usage:", + " ade skill list [--text|--json] List ADE's bundled agent skills", + " ade skill show [--text|--json] Show a bundled skill's SKILL.md", + "", + " Serves ADE's version-locked bundled agent skills directly from the", + " bundled resources, without a runtime daemon. JSON output is the default;", + " pass --text for human-readable output.", +].join("\n"); + +type SkillEntry = { + name: string; + description: string; + content: string; + path: string; +}; + +// --------------------------------------------------------------------------- +// Frontmatter parsing +// --------------------------------------------------------------------------- + +type ParsedSkillFile = { + name: string | null; + description: string; + body: string; +}; + +function parseSkillFile(content: string, fallbackName: string): ParsedSkillFile { + // Frontmatter is delimited by a leading `---` line and a closing `---` line. + const match = /^---\r?\n([\s\S]*?)\r?\n---\r?\n?([\s\S]*)$/.exec(content); + if (!match) { + return { name: fallbackName, description: "", body: content }; + } + const [, frontmatter, body] = match; + let name: string | null = null; + let description = ""; + try { + const data = parseYaml(frontmatter) as Record | null; + if (data && typeof data === "object") { + if (typeof data.name === "string") name = data.name.trim(); + if (typeof data.description === "string") description = data.description.trim(); + } + } catch { + // Fall through to a minimal line-based parse below. + } + if (name == null && !description) { + for (const line of frontmatter.split(/\r?\n/)) { + const kv = /^(\w[\w-]*):\s*(.*)$/.exec(line); + if (!kv) continue; + const key = kv[1].toLowerCase(); + const value = kv[2].trim().replace(/^["']|["']$/g, ""); + if (key === "name" && !name) name = value; + if (key === "description" && !description) description = value; + } + } + return { name: name ?? fallbackName, description, body: body.replace(/^\r?\n/, "") }; +} + +// --------------------------------------------------------------------------- +// Discovery +// --------------------------------------------------------------------------- + +function bundledSkillRoots(): string[] { + // Prefer roots that actually exist and contain at least one /SKILL.md. + const candidates = getAdeAgentSkillRootCandidates({ includeDeepSourceFallbacks: true }); + const existing: string[] = []; + for (const root of candidates) { + try { + if (!fs.statSync(root).isDirectory()) continue; + } catch { + continue; + } + existing.push(root); + } + return existing; +} + +function readSkillEntries(): SkillEntry[] { + const roots = bundledSkillRoots(); + const byName = new Map(); + for (const root of roots) { + let dirents: fs.Dirent[]; + try { + dirents = fs.readdirSync(root, { withFileTypes: true }); + } catch { + continue; + } + for (const dirent of dirents) { + if (!dirent.isDirectory()) continue; + const skillPath = path.join(root, dirent.name, "SKILL.md"); + let content: string; + try { + content = fs.readFileSync(skillPath, "utf8"); + } catch { + continue; + } + const parsed = parseSkillFile(content, dirent.name); + const name = parsed.name ?? dirent.name; + // De-dupe by name: first root wins (candidate order = bundled preference). + if (byName.has(name)) continue; + byName.set(name, { + name, + description: parsed.description, + content, + path: skillPath, + }); + } + } + return [...byName.values()].sort((a, b) => a.name.localeCompare(b.name)); +} + +// --------------------------------------------------------------------------- +// Command dispatch +// --------------------------------------------------------------------------- + +type OutputFormat = "json" | "text"; + +function extractFormat(args: string[]): { format: OutputFormat; rest: string[] } { + let format: OutputFormat = "json"; + const rest: string[] = []; + for (const arg of args) { + if (arg === "--text") { + format = "text"; + continue; + } + if (arg === "--json") { + format = "json"; + continue; + } + rest.push(arg); + } + return { format, rest }; +} + +export function runSkillCommand(rest: string[]): SkillCliResult { + if (rest.length === 0 || rest[0] === "--help" || rest[0] === "-h") { + return { output: `${HELP_SKILL}\n`, exitCode: 0 }; + } + const [verb, ...verbArgs] = rest; + switch (verb) { + case "list": + return runSkillList(verbArgs); + case "show": + return runSkillShow(verbArgs); + default: + throw new CliSkillUsageError( + `Unknown skill subcommand: ${verb}. Try 'ade skill list' or 'ade skill show '.`, + ); + } +} + +export function runSkillList(args: string[]): SkillCliResult { + const { format } = extractFormat(args); + const entries = readSkillEntries(); + if (format === "text") { + if (entries.length === 0) { + return { output: "No bundled agent skills found.\n", exitCode: 0 }; + } + const lines = entries.map((entry) => + entry.description ? `${entry.name} — ${entry.description}` : entry.name, + ); + return { output: `${lines.join("\n")}\n`, exitCode: 0 }; + } + const json = entries.map((entry) => ({ + name: entry.name, + description: entry.description, + path: entry.path, + })); + return { output: `${JSON.stringify(json, null, 2)}\n`, exitCode: 0 }; +} + +export function runSkillShow(args: string[]): SkillCliResult { + const { format, rest } = extractFormat(args); + const name = rest[0]; + if (!name) { + throw new CliSkillUsageError("ade skill show [--text|--json]"); + } + const entries = readSkillEntries(); + const entry = entries.find((candidate) => candidate.name === name); + if (!entry) { + const available = entries.map((candidate) => candidate.name).join(", ") || "(none found)"; + throw new CliSkillUsageError( + `Unknown skill: ${name}. Available skills: ${available}.`, + ); + } + if (format === "text") { + const parsed = parseSkillFile(entry.content, entry.name); + return { output: `${parsed.body.replace(/\r?\n$/, "")}\n`, exitCode: 0 }; + } + const json = { + name: entry.name, + description: entry.description, + content: entry.content, + path: entry.path, + }; + return { output: `${JSON.stringify(json, null, 2)}\n`, exitCode: 0 }; +} diff --git a/apps/ade-cli/src/tuiClient/__tests__/adeApi.test.ts b/apps/ade-cli/src/tuiClient/__tests__/adeApi.test.ts index fd035f035..5d0040ed0 100644 --- a/apps/ade-cli/src/tuiClient/__tests__/adeApi.test.ts +++ b/apps/ade-cli/src/tuiClient/__tests__/adeApi.test.ts @@ -623,7 +623,7 @@ describe("sendChatMessage", () => { ], }, ]); - expect(JSON.stringify(calls)).not.toContain("only normal reason to skip ADE CLI"); + expect(JSON.stringify(calls)).not.toContain("control plane for ADE state"); expect(JSON.stringify(calls)).not.toContain("ade actions list --text"); }); }); diff --git a/apps/desktop/src/main/services/ai/tools/systemPrompt.test.ts b/apps/desktop/src/main/services/ai/tools/systemPrompt.test.ts index 1f07eb5a4..a1d6a9330 100644 --- a/apps/desktop/src/main/services/ai/tools/systemPrompt.test.ts +++ b/apps/desktop/src/main/services/ai/tools/systemPrompt.test.ts @@ -160,7 +160,7 @@ describe("buildCodingAgentSystemPrompt", () => { expect(result).toContain("orchestration **LEAD**"); expect(result).toContain("/repo/apps/desktop/resources/agent-skills"); - expect(result).toContain("ADE_AGENT_SKILLS_DIRS"); + expect(result).toContain("read the matching `ade-*` skill"); expect(result).toContain("ade-orchestrator/SKILL.md"); expect(result).toContain("Lead planning quality contract"); expect(result).toContain("out-of-scope / non-goals"); @@ -288,9 +288,9 @@ describe("buildCodingAgentSystemPrompt", () => { it("always includes operating loop, editing rules, and verification rules", () => { const result = buildCodingAgentSystemPrompt({ cwd: "/x" }); expect(result).toContain("## Operating Loop"); - expect(result).toContain("## ADE CLI"); - expect(result).toContain("only normal reason to skip ADE CLI"); - expect(result).toContain("ADE exposes Agent Skills from project, user, runtime, and bundled ADE skill roots"); + expect(result).toContain("## ADE"); + expect(result).toContain("read the matching `ade-*` skill"); + expect(result).toContain("Your ADE capabilities ship as Agent Skills"); expect(result).toContain("ade-ios-simulator"); expect(result).toContain("ade-cli-control-plane"); expect(result).toContain("ade-orchestrator"); diff --git a/apps/desktop/src/main/services/chat/agentChatService.test.ts b/apps/desktop/src/main/services/chat/agentChatService.test.ts index c8c4c8bd4..07de47eda 100644 --- a/apps/desktop/src/main/services/chat/agentChatService.test.ts +++ b/apps/desktop/src/main/services/chat/agentChatService.test.ts @@ -1957,11 +1957,10 @@ describe("createAgentChatService", () => { }); const opts = vi.mocked(claudeSdkCreateSessionCompat).mock.calls[0]?.[0] as { systemPrompt?: { append?: string } } | undefined; - expect(opts?.systemPrompt?.append).toContain("default control plane"); - expect(opts?.systemPrompt?.append).toContain("only normal reason to skip ADE CLI"); - expect(opts?.systemPrompt?.append).toContain("ade lanes list"); - expect(opts?.systemPrompt?.append).toContain("ADE proof drawer"); - expect(opts?.systemPrompt?.append).toContain("clean up old, stale, or finished processes"); + expect(opts?.systemPrompt?.append).toContain("control plane for ADE state"); + expect(opts?.systemPrompt?.append).toContain("read the matching `ade-*` skill"); + expect(opts?.systemPrompt?.append).toContain("ade help "); + expect(opts?.systemPrompt?.append).toContain("clean up processes you start"); }); it("rebuilds the Claude query with the per-turn reasoning effort, not the stale warm-query effort (FIX 3)", async () => { @@ -2119,10 +2118,10 @@ describe("createAgentChatService", () => { .find((payload) => payload.includes("Inspect the repo and report the chat wiring.")); expect(userTurnPayload).toContain("[ADE launch directive]"); - expect(userTurnPayload).not.toContain("only normal reason to skip ADE CLI"); + expect(userTurnPayload).not.toContain("control plane for ADE state"); expect(userTurnPayload).not.toContain("ade actions list --text"); const opts = vi.mocked(claudeSdkCreateSessionCompat).mock.calls[0]?.[0] as { systemPrompt?: { append?: string } } | undefined; - expect(opts?.systemPrompt?.append).toContain("only normal reason to skip ADE CLI"); + expect(opts?.systemPrompt?.append).toContain("control plane for ADE state"); }); it("keeps Claude SDK setting sources and skills enabled without output-style plugins", async () => { @@ -3277,10 +3276,10 @@ describe("createAgentChatService", () => { expect(firstUserContent).toContain(tmpRoot); expect(firstUserContent).toContain("Read-only inspection outside that worktree is allowed"); expect(firstUserContent).toContain("mutating commands only inside that worktree"); - expect(firstUserContent).toContain("only normal reason to skip ADE CLI"); + expect(firstUserContent).toContain("control plane for ADE state"); expect(firstUserContent).toContain("ade actions list --text"); expect(secondUserContent).not.toContain("[ADE launch directive]"); - expect(secondUserContent).toContain("only normal reason to skip ADE CLI"); + expect(secondUserContent).toContain("control plane for ADE state"); }); it("starts Codex sessions without ADE-owned tool server injection", async () => { @@ -3316,7 +3315,7 @@ describe("createAgentChatService", () => { } | undefined; const textInput = turnParams?.input?.map((entry) => String(entry.text ?? "")).join("\n") ?? ""; expect(turnParams?.collaborationMode?.settings?.developer_instructions).toBe("system prompt"); - expect(textInput).not.toContain("only normal reason to skip ADE CLI"); + expect(textInput).not.toContain("control plane for ADE state"); expect(textInput).not.toContain("ade actions list --text"); expect(textInput).toContain("Inspect the repo and fix the lane launch bug."); }); diff --git a/apps/desktop/src/main/services/cli/adeCliService.ts b/apps/desktop/src/main/services/cli/adeCliService.ts index de5b06305..328e950f3 100644 --- a/apps/desktop/src/main/services/cli/adeCliService.ts +++ b/apps/desktop/src/main/services/cli/adeCliService.ts @@ -3,6 +3,7 @@ import os from "node:os"; import path from "node:path"; import type { AdeCliInstallResult, AdeCliStatus } from "../../../shared/types/adeCli"; import { ADE_AGENT_SKILLS_DIRS_ENV, joinAdeAgentSkillRoots, splitAdeAgentSkillRoots } from "../../../shared/agentSkillRoots"; +import { reseedAdeSkills } from "../skills/skillReseedService"; import type { Logger } from "../logging/logger"; import { spawnAsync } from "../shared/utils"; import { @@ -540,6 +541,27 @@ export function createAdeCliService(args: CreateAdeCliServiceArgs) { const commandName = resolveCommandName(args); const resolved = resolveCliPaths(args, commandName); const bundledAgentSkillsRoot = resolveBundledAgentSkillsRoot(args); + // Seed ADE's bundled skills into the home-level dirs every runtime discovers, so + // desktop-launched agents pick them up via the runtime's own progressive disclosure. + if ( + bundledAgentSkillsRoot + && process.env.ADE_DISABLE_SKILL_RESEED !== "1" + && !process.env.VITEST + ) { + try { + reseedAdeSkills({ + bundledRoot: bundledAgentSkillsRoot, + version: process.env.npm_package_version, + }); + } catch (error) { + // best-effort: skill re-seeding must never block desktop startup, but + // surface the failure so it can be debugged. + args.logger.warn("ade_cli.skill_reseed_failed", { + bundledRoot: bundledAgentSkillsRoot, + error: error instanceof Error ? error.message : String(error), + }); + } + } const envSnapshot = args.env ?? process.env; const hostPathSnapshot = getPathEnvValue(envSnapshot); diff --git a/apps/desktop/src/main/services/cto/ctoStateService.ts b/apps/desktop/src/main/services/cto/ctoStateService.ts index 9c47b0614..ae9206df0 100644 --- a/apps/desktop/src/main/services/cto/ctoStateService.ts +++ b/apps/desktop/src/main/services/cto/ctoStateService.ts @@ -179,7 +179,7 @@ function buildCtoEnvironmentKnowledge(): string { " - Example: 'Launch a chat with opus' → spawnChat({ modelId: 'anthropic/claude-opus-4-7', ... }). 'Open a terminal' → createTerminal. 'Run npm test' → createTerminal({ startupCommand: 'npm test' }).", "", "Tool calling convention:", - ADE_CLI_INLINE_GUIDANCE, + " - Use the `ade` CLI per the ADE CLI operating guidance in your doctrine.", " - If a tool from the manifest below is not in your immediate tool list, use the closest ADE CLI command or report the missing capability clearly.", "", "## PR Lifecycle in ADE", diff --git a/apps/desktop/src/main/services/cto/ctoWorkerLifecycle.test.ts b/apps/desktop/src/main/services/cto/ctoWorkerLifecycle.test.ts index c031afa35..bedb7034c 100644 --- a/apps/desktop/src/main/services/cto/ctoWorkerLifecycle.test.ts +++ b/apps/desktop/src/main/services/cto/ctoWorkerLifecycle.test.ts @@ -1069,8 +1069,8 @@ describe("workerAdapterRuntimeService (file group)", () => { timeoutMs: 300000, }); const firstCall = runSessionTurn.mock.calls[0] as unknown as [{ text: string }] | undefined; - expect(firstCall?.[0]?.text).toContain("## ADE CLI"); - expect(firstCall?.[0]?.text).toContain("only normal reason to skip ADE CLI"); + expect(firstCall?.[0]?.text).toContain("## ADE"); + expect(firstCall?.[0]?.text).toContain("control plane for ADE state"); expect(result.effectiveSurface).toBe("unified_chat"); expect(result.continuation).toMatchObject({ surface: "unified_chat", diff --git a/apps/desktop/src/main/services/skills/skillReseedService.test.ts b/apps/desktop/src/main/services/skills/skillReseedService.test.ts new file mode 100644 index 000000000..0cb7334ab --- /dev/null +++ b/apps/desktop/src/main/services/skills/skillReseedService.test.ts @@ -0,0 +1,110 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { reseedAdeSkills } from "./skillReseedService"; + +function writeSkill(root: string, name: string, body: string): void { + const dir = path.join(root, name); + fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync(path.join(dir, "SKILL.md"), body); +} + +describe("reseedAdeSkills", () => { + let tmp: string; + let bundled: string; + let target: string; + + beforeEach(() => { + tmp = fs.mkdtempSync(path.join(os.tmpdir(), "ade-reseed-")); + bundled = path.join(tmp, "bundled"); + target = path.join(tmp, "home", ".claude", "skills"); + fs.mkdirSync(bundled, { recursive: true }); + }); + + afterEach(() => { + fs.rmSync(tmp, { recursive: true, force: true }); + }); + + it("copies only ade-* bundled skills into the target and writes a manifest", () => { + writeSkill(bundled, "ade-browser", "# browser"); + writeSkill(bundled, "ade-proof-artifacts", "# proof"); + writeSkill(bundled, "not-an-ade-skill", "# ignored"); // missing ade- prefix → skipped + + const result = reseedAdeSkills({ bundledRoot: bundled, targetDirs: [target], version: "1" }); + + expect(result.skillNames).toEqual(["ade-browser", "ade-proof-artifacts"]); + expect(result.targetsWritten).toEqual([target]); + expect(fs.existsSync(path.join(target, "ade-browser", "SKILL.md"))).toBe(true); + expect(fs.existsSync(path.join(target, "ade-proof-artifacts", "SKILL.md"))).toBe(true); + expect(fs.existsSync(path.join(target, "not-an-ade-skill"))).toBe(false); + expect(fs.existsSync(path.join(target, ".ade-skills.json"))).toBe(true); + }); + + it("is a no-op on the second run when nothing changed (self-healing, cheap)", () => { + writeSkill(bundled, "ade-browser", "# browser"); + reseedAdeSkills({ bundledRoot: bundled, targetDirs: [target], version: "1" }); + + const second = reseedAdeSkills({ bundledRoot: bundled, targetDirs: [target], version: "1" }); + expect(second.targetsWritten).toEqual([]); + expect(second.targetsUpToDate).toEqual([target]); + }); + + it("never clobbers or prunes a user's own (non-managed) skills", () => { + writeSkill(bundled, "ade-browser", "# browser"); + reseedAdeSkills({ bundledRoot: bundled, targetDirs: [target], version: "1" }); + writeSkill(target, "my-own-skill", "# mine"); // user-authored, not ADE-managed + + // bundle changes → re-seed runs again + writeSkill(bundled, "ade-linear", "# linear"); + reseedAdeSkills({ bundledRoot: bundled, targetDirs: [target], version: "1" }); + + expect(fs.existsSync(path.join(target, "my-own-skill", "SKILL.md"))).toBe(true); + expect(fs.existsSync(path.join(target, "ade-linear", "SKILL.md"))).toBe(true); + }); + + it("materializes real files when the bundle ships a skill as a symlink (dereference)", () => { + // Some bundle layouts (e.g. a plugin) ship skill dirs as symlinks. The seeded + // copy must be real files in the user's home, never a link back into the bundle. + const realSkill = path.join(tmp, "real", "ade-linked"); + fs.mkdirSync(realSkill, { recursive: true }); + fs.writeFileSync(path.join(realSkill, "SKILL.md"), "# linked"); + fs.symlinkSync(realSkill, path.join(bundled, "ade-linked"), "dir"); + + reseedAdeSkills({ bundledRoot: bundled, targetDirs: [target], version: "1" }); + + const dest = path.join(target, "ade-linked"); + expect(fs.lstatSync(dest).isSymbolicLink()).toBe(false); + expect(fs.lstatSync(path.join(dest, "SKILL.md")).isSymbolicLink()).toBe(false); + expect(fs.readFileSync(path.join(dest, "SKILL.md"), "utf8")).toBe("# linked"); + }); + + it("seeds every target dir independently when given multiple targets", () => { + writeSkill(bundled, "ade-browser", "# browser"); + const target1 = path.join(tmp, "home", ".claude", "skills"); + const target2 = path.join(tmp, "home", ".agents", "skills"); + + const result = reseedAdeSkills({ + bundledRoot: bundled, + targetDirs: [target1, target2], + version: "1", + }); + + expect(result.targetsWritten).toEqual([target1, target2]); + expect(fs.existsSync(path.join(target1, "ade-browser", "SKILL.md"))).toBe(true); + expect(fs.existsSync(path.join(target2, "ade-browser", "SKILL.md"))).toBe(true); + }); + + it("prunes ADE-managed skills that are no longer bundled, on a content change", () => { + writeSkill(bundled, "ade-old", "# old"); + reseedAdeSkills({ bundledRoot: bundled, targetDirs: [target], version: "1" }); + expect(fs.existsSync(path.join(target, "ade-old"))).toBe(true); + + fs.rmSync(path.join(bundled, "ade-old"), { recursive: true, force: true }); + writeSkill(bundled, "ade-new", "# new"); + reseedAdeSkills({ bundledRoot: bundled, targetDirs: [target], version: "1" }); + + expect(fs.existsSync(path.join(target, "ade-old"))).toBe(false); + expect(fs.existsSync(path.join(target, "ade-new", "SKILL.md"))).toBe(true); + }); +}); diff --git a/apps/desktop/src/main/services/skills/skillReseedService.ts b/apps/desktop/src/main/services/skills/skillReseedService.ts new file mode 100644 index 000000000..68b383f05 Binary files /dev/null and b/apps/desktop/src/main/services/skills/skillReseedService.ts differ diff --git a/apps/desktop/src/renderer/components/terminals/cliLaunch.test.ts b/apps/desktop/src/renderer/components/terminals/cliLaunch.test.ts index 3069f1fab..833bf6e91 100644 --- a/apps/desktop/src/renderer/components/terminals/cliLaunch.test.ts +++ b/apps/desktop/src/renderer/components/terminals/cliLaunch.test.ts @@ -230,14 +230,14 @@ describe("buildTrackedCliStartupCommand", () => { it("adds the dangerous bypass flag for full-auto", () => { const command = buildTrackedCliStartupCommand({ provider: "codex", permissionMode: "full-auto" }); expect(command).toContain("codex --no-alt-screen --dangerously-bypass-approvals-and-sandbox"); - expect(command).not.toContain("only normal reason to skip ADE CLI"); + expect(command).not.toContain("control plane for ADE state"); }); it("adds supported workspace-write defaults for default", () => { const command = buildTrackedCliStartupCommand({ provider: "codex", permissionMode: "default" }); expect(command).toContain("codex --no-alt-screen --sandbox workspace-write --ask-for-approval on-request"); expect(command).not.toContain("mcp_servers.linear"); - expect(command).not.toContain("only normal reason to skip ADE CLI"); + expect(command).not.toContain("control plane for ADE state"); }); it("does not synthesize Codex MCP server config for any permission preset", () => { @@ -254,19 +254,19 @@ describe("buildTrackedCliStartupCommand", () => { expect(command).toContain("codex --no-alt-screen"); expect(command).not.toContain("--full-auto"); expect(command).not.toContain("mcp_servers.linear"); - expect(command).not.toContain("only normal reason to skip ADE CLI"); + expect(command).not.toContain("control plane for ADE state"); }); it("adds untrusted approval and workspace-write sandbox for edit", () => { const command = buildTrackedCliStartupCommand({ provider: "codex", permissionMode: "edit" }); expect(command).toContain("codex --no-alt-screen --sandbox workspace-write --ask-for-approval untrusted"); - expect(command).not.toContain("only normal reason to skip ADE CLI"); + expect(command).not.toContain("control plane for ADE state"); }); it("adds on-request approval and read-only sandbox for plan", () => { const command = buildTrackedCliStartupCommand({ provider: "codex", permissionMode: "plan" }); expect(command).toContain("codex --no-alt-screen --sandbox read-only --ask-for-approval on-request"); - expect(command).not.toContain("only normal reason to skip ADE CLI"); + expect(command).not.toContain("control plane for ADE state"); }); it("uses the selected lane worktree to seed skill roots", () => { diff --git a/apps/desktop/src/shared/adeCliGuidance.test.ts b/apps/desktop/src/shared/adeCliGuidance.test.ts index 2ca1f8b24..d62a1d56b 100644 --- a/apps/desktop/src/shared/adeCliGuidance.test.ts +++ b/apps/desktop/src/shared/adeCliGuidance.test.ts @@ -1,21 +1,48 @@ import { describe, expect, it } from "vitest"; -import { adeBundledAgentSkills, buildAdeCliAgentGuidance } from "./adeCliGuidance"; +import { adeBundledAgentSkills, buildAdeBootstrapGuidance, buildAdeCliAgentGuidance } from "./adeCliGuidance"; describe("ADE CLI guidance", () => { - it("preinjects bundled skill discovery guidance for every ADE runtime surface", () => { - const guidance = buildAdeCliAgentGuidance(["/Applications/ADE.app/Contents/Resources/agent-skills"]); + it("now aliases the minimal bootstrap (the verbose always-on blob was removed)", () => { + const roots = ["/Applications/ADE.app/Contents/Resources/agent-skills"]; + const guidance = buildAdeCliAgentGuidance(roots); - expect(guidance).toContain("### Skills"); - expect(guidance).toContain("project, user, runtime, and bundled ADE skill roots"); - expect(guidance).toContain("ADE-hosted Work chats"); - expect(guidance).toContain("ADE Code/TUI sessions"); - expect(guidance).toContain("CTO prompts"); - expect(guidance).toContain("mobile-started work"); - expect(guidance).toContain("ADE_AGENT_SKILLS_DIRS"); - expect(guidance).toContain("/SKILL.md"); - expect(guidance).toContain("references/"); + // The canonical builder is a thin alias over the bootstrap. + expect(guidance).toBe(buildAdeBootstrapGuidance(roots)); + // It still advertises the bundled skill index so the model knows what exists. for (const skillName of adeBundledAgentSkills) { expect(guidance).toContain(`\`${skillName}\``); } + // The old always-on rulebook is gone — those rules now live in the skills. + expect(guidance).not.toContain("### Minimum operating rules"); + }); +}); + +describe("ADE bootstrap guidance", () => { + const roots = ["/Applications/ADE.app/Contents/Resources/agent-skills"]; + + it("teaches the skill-on-demand habit and the ground-truth CLI fallback", () => { + const bootstrap = buildAdeBootstrapGuidance(roots); + + expect(bootstrap).toContain("## ADE"); + // The habit: reach for the matching skill on demand, not an inlined rulebook. + expect(bootstrap).toContain("read the matching `ade-*` skill"); + // The fallback: CLI help is ground truth (agents are not trained on `ade`). + expect(bootstrap).toContain("ade help "); + expect(bootstrap).toContain("ade actions list --text"); + // The skill index is still advertised so the model knows what exists. + for (const skillName of adeBundledAgentSkills) { + expect(bootstrap).toContain(`\`${skillName}\``); + } + }); + + it("stays within a small budget — the always-on socket/browser/proof tax is gone", () => { + const bootstrap = buildAdeBootstrapGuidance(roots); + + // The redesign dropped the ~4,300-char blob; the bootstrap must stay tiny + // (~260 tokens). Guard against silent re-growth back into a rulebook. + expect(bootstrap.length).toBeLessThan(1600); + // The per-domain operating rules now live in their skills, not always-on. + expect(bootstrap).not.toContain("### Minimum operating rules"); + expect(bootstrap).not.toContain("--socket"); }); }); diff --git a/apps/desktop/src/shared/adeCliGuidance.ts b/apps/desktop/src/shared/adeCliGuidance.ts index 39599ce6f..c86d34baa 100644 --- a/apps/desktop/src/shared/adeCliGuidance.ts +++ b/apps/desktop/src/shared/adeCliGuidance.ts @@ -13,32 +13,14 @@ export const adeBundledAgentSkills = [ "ade-deeplinks", ] as const; +/** + * @deprecated Superseded by {@link buildAdeBootstrapGuidance}. Kept as a thin alias so + * existing call sites stay wired to the (now minimal) bootstrap. The previous ~1,000-token + * blob is gone: ADE's capabilities are delivered as Agent Skills that each runtime discovers + * natively (progressive disclosure), seeded by `skillReseedService`. Do not re-grow this. + */ export function buildAdeCliAgentGuidance(skillRoots: readonly string[] = getAdeAgentSkillRootsForPrompt()): string { - return [ - "## ADE CLI", - "ADE is a local-first desktop development environment for lanes, chats, terminal sessions, PR workflows, proof/artifacts, App Control, iOS Simulator/Preview Lab state, the VM tab, config, and managed processes.", - "`ade` is the default control plane for ADE-managed sessions. Use normal shell commands for immediate repo inspection/edit/test work; use ADE CLI when you need ADE state, drawer/session state, proof registration, PR metadata, or managed app/simulator/browser/VM control.", - "", - "### Skills", - "- ADE exposes Agent Skills from project, user, runtime, and bundled ADE skill roots. Use the relevant skill instead of relying on long prompt guidance.", - `- Bundled ADE skills include: ${adeBundledAgentSkills.map((name) => `\`${name}\``).join(", ")}.`, - "- ADE injects this guidance into ADE-hosted Work chats, Work tab CLI launches, ADE Code/TUI sessions, CTO prompts, and mobile-started work that executes through ADE's desktop or project runtime.", - "- Skills use the Agent Skills package shape: `/SKILL.md` plus optional `references/`, `scripts/`, and `assets/` files. When a skill applies, read its `SKILL.md` before acting, then load referenced files only when needed.", - "- If skills are not auto-listed by your runtime, look for them in project/user `.agents/skills`, `.ade/skills`, `.claude/skills`, or ADE's bundled `agent-skills` resources, then read that skill's `SKILL.md` on demand.", - `- ${formatAdeAgentSkillRootsForPrompt(skillRoots)}`, - "- ADE also sets `ADE_AGENT_SKILLS_DIRS` for ADE-launched CLI sessions when skill roots are known so CLI runtimes can discover the same skills.", - "- When a bundled skill applies *differently* in this project (a missing flag, a port conflict, a required setup step, a workaround for a local quirk), propose appending a one-line note to `/CLAUDE.md` or `/AGENTS.md` — whichever the project already uses — so the next agent picks it up automatically. Propose the edit; do not silently write to user-curated docs. If neither file exists, ask the user which they prefer before creating one.", - "", - "### Minimum operating rules", - "- Start with `ade doctor --text` when the ADE environment is unclear. Use `ade help ` for exact flags and `ade actions list --text` as the escape hatch for service actions without a typed command.", - "- If `command -v ade` fails, try `${ADE_CLI_PATH:-}` when set, then `${ADE_CLI_BIN_DIR:-}/ade`, and in an ADE source checkout fall back to `node apps/ade-cli/dist/cli.cjs ...` after confirming it exists. The only normal reason to skip ADE CLI for an ADE action is that it is truly unreachable.", - "- Use typed commands with `--text` for readable output. Common starts: `ade lanes list --text`, `ade chat list --text`, `ade proof status --text`, and `ade actions list --text`.", - "- Automations, Linear webhook ingress, and macOS VM are internal/coming-soon in production builds. Do not use `ade automations`, `ade linear ingress`, or `ade macos-vm` unless the user explicitly asks and the matching internal override env var is set.", - "- Use `--socket` when the ADE desktop drawer and the CLI must share live state such as App Control, iOS Simulator, Preview Lab, browser tabs, terminal logs, selection/context capture, proof drawer updates, or macOS VM state.", - "- If any task needs a browser, web page, localhost preview, login-backed site, screenshot, DOM inspection, form fill, click, or navigation, use ADE's built-in browser through `ade --socket browser ...` and read the `ade-browser` skill before trying an external browser/tool. Start with `ade --socket browser tabs --text`; reuse this chat's owned tab/session, or run plain `ade --socket browser open --text` to reuse/create one in the background. Use `--new-tab` only when the task truly needs another tab and `--panel` only when the user should see it.", - "- When the user asks you to capture, send, attach, or provide proof, use the relevant capture tool first, then register it with ADE via `ade proof ...` so it appears in the ADE proof drawer for the active chat or lane.", - "- When you run processes of any kind, track what you started and clean up old, stale, or finished processes before leaving the task.", - ].join("\n"); + return buildAdeBootstrapGuidance(skillRoots); } export const ADE_CLI_AGENT_GUIDANCE = buildAdeCliAgentGuidance(); @@ -48,3 +30,25 @@ export function buildAdeCliInlineGuidance(skillRoots: readonly string[] = getAde } export const ADE_CLI_INLINE_GUIDANCE = buildAdeCliInlineGuidance(); + +/** + * Minimal always-on bootstrap that replaces the heavier {@link buildAdeCliAgentGuidance} + * blob. It teaches the habit (reach for the matching `ade-*` skill on demand) and the + * ground-truth fallback (`ade help` / `ade actions list`) instead of inlining every + * socket/browser/proof/VM rule on every turn — those now live in their skills, which each + * runtime discovers natively (progressive disclosure). Keep this short; do not re-grow it. + */ +export function buildAdeBootstrapGuidance( + skillRoots: readonly string[] = getAdeAgentSkillRootsForPrompt(), +): string { + return [ + "## ADE", + "You're working inside ADE, a local-first dev environment (lanes, chats, terminals, PRs, proof/artifacts, app & iOS-simulator & browser control). The `ade` CLI is your control plane for ADE state — it is not in your training data, so consult `ade help ` rather than guessing command syntax.", + "Your ADE capabilities ship as Agent Skills. When a task touches an ADE area (lanes/git, PRs, proof & screenshots, the built-in browser, iOS simulator, app control, Linear, deeplinks), read the matching `ade-*` skill before acting; otherwise ignore them.", + `Skills: ${adeBundledAgentSkills.map((name) => `\`${name}\``).join(", ")}.`, + formatAdeAgentSkillRootsForPrompt(skillRoots), + "Ground truth for any `ade` invocation is `ade help ` and `ade actions list --text`; prefer typed commands with `--text`. Track and clean up processes you start.", + ].join("\n"); +} + +export const ADE_BOOTSTRAP_GUIDANCE = buildAdeBootstrapGuidance(); diff --git a/codexGoal.md b/codexGoal.md deleted file mode 100644 index 2f11618a2..000000000 --- a/codexGoal.md +++ /dev/null @@ -1,313 +0,0 @@ -# ADE `ade code` TUI — finish Phase 3, build Phase 4 + 5 - -## Kickoff prompt (paste this to the agent) - -> You're continuing the ADE TUI parity pass on branch `ade/tui-parity-pass` in -> this worktree. The TUI is the Ink/React terminal client at -> `apps/ade-cli/src/tuiClient/` (`ade code`). Implement, in order: the **remaining -> Phase 3** items, then **all of Phase 4**, then **all of Phase 5**, exactly as -> specified in this file (`codexGoal.md`). Work in small, committed, tested -> increments — after each task run `npx tsc -p tsconfig.json --noEmit` and the -> relevant `npx vitest run ` from `apps/ade-cli/`, and add a focused unit -> test for any new pure helper. Do NOT regress the 842 passing tests. `app.tsx` -> is a ~10k-line monolith touched by most tasks: edit it sequentially (no -> parallel agents writing it concurrently); use read-only agents only for -> investigation. Follow the existing patterns described below. Build with -> `npm run build` in `apps/ade-cli` before declaring a task done. The user runs -> the live TUI for validation — keep each increment shippable and eyeball-able. - ---- - -## Context & ground rules - -**Where:** `apps/ade-cli/src/tuiClient/` — `app.tsx` (the monolith: state, input -handling, command dispatch, render), plus `components/` (ChatView, RightPane, -Drawer, MultiChatGrid, ModelPicker, FooterControls, ApprovalPrompt, Header, -SlashPalette, MentionPalette), and helpers (`adeApi.ts`, `connection.ts`, -`jsonRpcClient.ts`, `hitTestRegistry.ts`, `theme.ts`, `format.ts`, -`aggregate.ts`, `spinTick.tsx`, `commands.ts`, `types.ts`). It speaks ADE -JSON-RPC to the runtime daemon and shares types/logic with the desktop under -`apps/desktop/src/shared/`. The desktop renderer (`apps/desktop/src/renderer/`) -is the design reference — match its semantics, not its layout (this is a -width-constrained terminal: Ink `Box`/`Text` only, colors from `theme.ts`). - -**Build / test (from `apps/ade-cli/`):** -- Typecheck: `npx tsc -p tsconfig.json --noEmit` -- Scoped tests: `npx vitest run src/tuiClient/__tests__/` -- Full TUI-ish suite: `npx vitest run` -- Bundle (must pass before "done"): `npm run build` - -**Live test (the user does this; you can smoke it):** rebuild, then -`ADE_DEFAULT_ROLE=cto ADE_HOME=/Users/admin/.ade-tui-parity node apps/ade-cli/dist/cli.cjs runtime start` -and `ADE_HOME=/Users/admin/.ade-tui-parity node apps/ade-cli/dist/cli.cjs --socket code` -(a dedicated isolated daemon; rebuilding changes the build hash so restart the -daemon after each build). Shut down with `runtime stop`. - -**Conventions / patterns already in place — reuse, don't reinvent:** -- **Theme:** all colors via `theme.ts` tokens (`theme.color.*`, `theme.provider(family)`, - `theme.lane(lane)`, `theme.rail`). Brand violet `#A78BFA` is the accent; - selected/focused = violet, neutral borders = `theme.color.border`. No raw hex, - no green for "healthy/idle" chrome (green reads as a glitch — reserve it for - the running spinner only). -- **Hit-test / mouse:** `hitTestRegistry.ts` — `useHitTestTarget({id, rect, onClick, zIndex})` - returns an `isHovered` boolean; the move handler in `app.tsx` (`hoverTest`, - grep `hoverTest`) sets `hoveredHitId` which flows via `HitTestProvider`. Mouse - parsing: `parseTerminalMouseInput` (SGR/rxvt/X10), `decodeMouseButton`. Many - app-level targets are registered in the render pass in `app.tsx` (grep - `addFooterInlineTarget`, `appHitTargetIdsRef`, `registry.register`). -- **Streaming:** chat events are coalesced (`flushPendingChatEvents` / - `scheduleChatFlush` / `CHAT_EVENT_FLUSH_MS`); a single shared `displayBlocks` - (`aggregateChatBlocks`) is threaded into ChatView + the `render*`/`compute*` - helpers. Don't add per-token work or new full-transcript walks. -- **Grid:** `multiView` ("grid exists") is decoupled from `gridViewActive` - ("grid shown") via `setGridView(active)` + `gridViewActiveRef`. Submit/scroll/ - selection routing and the grid sync effect already gate on `gridViewActive`. -- **Footer inline cells:** the cell order is a single source of truth — - `inlineRowCellOrder({providerLocked, fastSupported, reasoningSupported, subagentsVisible})` - (exported from `app.tsx`). Keyboard nav, mouse down-cycle, and hit-tests all - derive from it. Add new cells there. -- **Prompt input:** `applyCoalescedPromptInput` segments coalesced chunks - (Ink merges fast keystrokes). Reuse the prompt helpers (`insertPromptText`, - `deletePromptBackward`, etc.). -- **Right pane:** `RightPaneContent` is a discriminated union in `types.ts`; - `RightPane.tsx` renders each `kind`. Forms use the `{kind:"form", command, fields}` - shape; submit handled in `app.tsx` (grep `form.command ===`). -- Line numbers in this file are approximate (the monolith shifts) — **grep for - the named symbol** to find the current site. - -**Out of scope / non-goals (do not build):** 2D React-Flow graph canvas, -multi-project tabs, Monaco-grade editing, full structured automation-rule editor. - ---- - -# PHASE 3 — remaining runtime UX + model picker - -Already done (do not redo): model-picker glyph/color unification + `⌕` search + -shortId/alias search; Codex `custom` preset cycle fix; footer fast/reasoning -reachability via `inlineRowCellOrder`; shimmer working indicator. - -## 3.1 — Codex approval × sandbox readout (S) -**Goal:** When provider is `codex`, show the resolved approval policy × sandbox -pair in the footer so it's legible even when the preset word is `custom`/`config-toml`. -**Files:** `app.tsx` (`resolveCodexPreset`, `permissionSummary`, `permissionOptionsDetail`), -`components/FooterControls.tsx`. -**Approach:** Add a pure helper `codexApprovalSandboxLabel(modelState)` near -`resolveCodexPreset` returning e.g. `"on-request · workspace-write"` from -`modelState.codexApprovalPolicy` / `codexSandbox`. Pass a `permissionDetail?: string|null` -prop to `FooterControls` and render it dim immediately after the permission cell -(only when provider === codex). Keep the headline preset word as-is. -**Acceptance:** Codex footer shows the approval/sandbox pair; switching presets -updates it; non-codex providers unaffected; unit-test the label helper. - -## 3.2 — Cursor modes from the runtime snapshot (M) -**Goal:** Cursor permission cycling should use the session's actual available -modes, not the static `CURSOR_AVAILABLE_MODE_IDS`. -**Files:** `types.ts` (`AdeCodeModelState`), `app.tsx` (model-state normalize/ -restore sites — grep `cursorModeId`, `cursorModeSnapshot`; `cyclePermission` -cursor branch; `permissionOptionsDetail` cursor branch), shared type -`AgentChatCursorModeSnapshot` in `apps/desktop/src/shared/types/chat.ts`. -**Approach:** Add `cursorAvailableModeIds: string[]` to `AdeCodeModelState` -(default `[]`); populate it from `configSession.cursorModeSnapshot?.availableModeIds` -everywhere `cursorModeId` is set from a snapshot. Add a resolver -`cursorModeIdsForState(modelState)` = snapshot ids when non-empty else the static -fallback (mirror desktop `AgentChatComposer` behavior). Use it in the -`cyclePermission` cursor branch and `permissionOptionsDetail`. `cursorModeLabel` -already handles unknown ids. -**Acceptance:** With a Cursor session whose snapshot lists a subset of modes, -cycling only visits those modes; with no snapshot, the static list is used. - -## 3.3 — Plan-approval card (M) -**Goal:** Render plan-mode / approval requests as a one-key approve/reject card -instead of forcing the typed high-stakes path. -**Files:** `pendingInput.ts` (the request → `PendingApproval` mapping), -`components/ApprovalPrompt.tsx`, `app.tsx` (the pending-approval render + the -approval resolution path — grep `pendingApproval`, `resolvePendingApproval`). -Also handle an orchestration `model_selection` request kind if present. -**Approach:** Detect plan-approval / model-selection request kinds in -`pendingInput.ts` and surface them as a structured `ApprovalPrompt` with labeled -choices; wire keys (e.g. `y`/`n` or numbered) + clickable footer buttons (reuse -the existing approval footer items pattern). Don't break the existing high-stakes -modal path. -**Acceptance:** A plan/approval request shows a readable card with one-key -accept/reject; resolving sends the right response. - -## 3.4 — Structural model-picker unification (L) -**Goal:** One picker for both `/model` and the new-chat flow. Retire the duplicate -inline `model-setup` / `new-chat-setup` rows as the *model* surface; fold -Permissions / Fast / Output-style into a slim settings strip inside -`ModelPickerPane`; add auth dots + sign-in hints + per-row reasoning chips. -**Files:** `components/ModelPicker/ModelPickerPane.tsx` (presentation), -`components/ModelPicker/modelPickerLayout.ts` (+ `types.ts` in that dir), -`tuiClient/types.ts` (`ModelPickerRightPaneContent`, maybe retire `model-setup`), -`components/RightPane.tsx` (the `model-setup`/`new-chat-setup` block + `modelPickerInputs`), -`app.tsx` (`openModelRow`, `modelSetupRows`/`modelPickerRows`, `openNewChatSetup`, -`commitModelPickerSelection`, the setup-row keyboard branch, the `aiStatus` -threading). Desktop reference: `apps/desktop/src/renderer/components/.../ModelPicker/` -(`ModelListRow.tsx`, `ModelPickerRail.tsx`) and `useProviderAuthStatus.ts` -(`familiesFromStatus`). -**Approach (sequence — ship pieces independently):** -1. **Reasoning chip** on the focused/active row (port `REASONING_LABELS`); cycle - via the existing `modelPicker:increaseEffort`/`decreaseEffort` actions. -2. **Auth dots + sign-in hint:** port pure `familiesFromStatus` into - `modelPickerLayout.ts`; thread `aiStatus` through `modelPickerInputs`; render a - 1-cell red/amber dot after each rail glyph and a `Sign in: /login ` - hint when the active rail provider is unauthed. -3. **"Show all models" toggle** (desktop `authOnly`): add `showAll` to the picker - state + an `authOnly` filter in `buildModelPickerLayout`; bind a key + hit-test. -4. **Settings strip + retire duplicate rows (the big one):** render Permissions/ - Fast/Output-style as a compact focusable strip at the bottom of `ModelPickerPane` - driven by the existing `buildSetupRows` (`SetupPaneRow`); extend the picker - state with `footerFocus?: SetupPaneRowKind`; Tab/arrows cycle into the strip and - reuse the existing `handleSetupRow`. Repoint `/effort` and `openNewChatSetup` - to open the unified picker; delete `openModelRow`, the `model-setup` kind, and - the inline `model-setup`/`new-chat-setup` rendering block once their rows feed - the strip. New-chat-only affordances (lane label, "prompt now"/background - dispatch, Apply) survive as picker header/footer actions. -**Cautions:** the picker re-renders on every keystroke (keep it pure/cheap; no -per-row IPC — precompute auth status and pass it in). Width-degrade all chips/dots -via the existing `endTruncate`/`innerWidth` budget. -**Acceptance:** `/model` and new-chat show the same picker; reasoning chip + auth -dots + show-all work; permissions/fast/output-style are editable inside the picker; -the old inline setup rows are gone; tests for the layout function extended. - ---- - -# PHASE 4 — full mouse control + global navigation - -The hover pipeline is wired (`hoverTest` fires on move, `hoveredHitId` flows via -`HitTestProvider`, `useHitTestTarget` returns is-hovered) but **only `MultiChatGrid` -consumes it**. Make every interactive surface mouse-driven, with hover affordances. - -## 4.1 — Universal hover (L) -**Goal:** Hovering any clickable row tints it. Consume `hoveredId` in `Drawer` -(lane/chat rows), `FooterControls` (cells/buttons), `ModelPicker` (rail + rows), -`RightPane` (list/diff/file rows, form fields), `ApprovalPrompt`. -**Files:** the component files above + `app.tsx` (where their hit-test targets are -registered — grep `registry.register`, `appHitTargetIdsRef`; many rows are -registered centrally in the render pass). -**Approach:** For each clickable region that already registers a hit-test target, -pass the hovered state down (or have the row call `useHitTestTarget` with its id + -rect) and tint on match (e.g. `theme.color.borderActive` background or violet -text). The move handler already re-renders on hover change, so this is mostly -plumbing. Keep the registration the single source (don't double-register). -**Acceptance:** moving the mouse over drawer lanes/chats, footer cells, model rows, -right-pane rows highlights the row under the cursor; clicking still works. - -## 4.2 — Wheel routed to the pane under the cursor (M) -**Goal:** The wheel scrolls whatever pane the pointer is over, not only the center -transcript. (Grid tiles already scroll-under-cursor — keep that.) -**Files:** `app.tsx` (wheel handler — grep `mouse.kind === "wheel"`), `RightPane.tsx`. -**Approach:** Add scroll-offset state for the right pane (copy ChatView's -`sliceRows`/`maxScrollOffsetForRows`/`scrollOffsetRows` machinery) so `/diff` and -detail/list panes become scrollable instead of truncating. In the wheel handler, -dispatch by pointer region: drawer → drawer scroll; right pane → right-pane offset; -center → existing transcript/tile logic. -**Acceptance:** wheel over the drawer, right pane, and center each scroll the -correct region; long `/diff` and detail panes scroll. - -## 4.3 — Clickable chat links (M) -**Goal:** URLs in chat are openable (OSC-8 + click). -**Files:** `format.ts` (link runs — grep `link`, `LINK_COLOR`; the href is -currently dropped, see the comment "doesn't render hyperlinks distinctly today"), -`components/ChatView.tsx` (`InlineSpans` link branch), `app.tsx` -(`openExternal`/external-open path — grep the PR-url open). -**Approach:** Carry the href on the link `InlineRun`; emit an OSC-8 hyperlink -escape around the visible text; register a hit target over the link rect that -calls the existing external-open helper. Verify the OSC-8 sequence is width-0 (no -layout shift). -**Acceptance:** a URL in an assistant message is underlined, OSC-8 clickable in -supporting terminals, and a mouse click opens it. - -## 4.4 — Ctrl+K command / lane / chat palette (L) -**Goal:** A global fuzzy palette to jump to lanes, chats, and commands (like the -desktop `CommandPalette` / Claude Code's `/`-less quick switch). -**Files:** new overlay component (model it on `components/SlashPalette.tsx`), -`keybindings/index.ts` (add `app:openCommandPalette`), `app.tsx` (state + render + -key handling), `commands.ts` (reuse `paletteCommands`). -**Approach:** Ctrl+K opens an overlay listing: built-in + user slash commands, -lanes (jump/switch), and chats (jump/switch). Fuzzy filter as you type (reuse the -slash/mention palette filtering); ↑↓ + mouse hover to select; Enter runs/jumps; -Esc closes. Selecting a lane/chat routes through `applyDrawerChatSelection` -(so grid re-entry works); selecting a command dispatches via the existing -command runner. -**Acceptance:** Ctrl+K opens; typing filters across commands/lanes/chats; Enter -jumps or runs; mouse hover + click work; Esc closes; no conflict with Ctrl+R -(history) or other bindings. - -## 4.5 — `[` / `]` lane cycling + `/switch` restores last chat + reverse pane cycle (S) -**Files:** `app.tsx` (grep `cycleScope`/`[`/`]` currently bound only in the model -picker; `/switch` handler ~grep `"/switch"`; `tabs:previous`). -**Approach:** Bind `[`/`]` (when not in a text field/palette) to cycle the active -lane prev/next. Make `/switch ` restore that lane's last-active chat -(`lastChatByLaneRef`). Fix `tabs:previous` aliasing forward (make it reverse). -**Acceptance:** `[`/`]` move between lanes; `/switch` lands on the last chat; -reverse pane-cycle goes backward. - ---- - -# PHASE 5 — chat management completeness - -## 5.1 — Delete / archive / unarchive chat (L) -**Goal:** Manage chat sessions from the TUI (the runtime supports it; the TUI has -no wrappers and never filters archived chats). -**Files:** `adeApi.ts` (add `deleteSession`/`archiveSession`/`unarchiveSession` -wrappers — confirm the exact action names via the runtime action registry, -`apps/desktop/src/main/services/adeActions/registry.ts` chat/session domain), -`app.tsx` (session list — **filter out `session.archivedAt`**; add drawer chat-row -actions + `/chat …` commands + a confirm gate), `commands.ts` (add the commands), -`components/Drawer.tsx` (a click-× / hotkey on chat rows), `types.ts` if a form is -needed. -**Approach:** Mirror the lane-management pattern already in place -(`/lane archive|unarchive|delete` + drawer hotkeys r/a/x + delete-risk preflight): -add `/chat rename|archive|unarchive|delete` (or reuse `/rename` for chat title), -drawer hotkeys on the selected chat row, and a confirm for delete. Filter -`!session.archivedAt` from the displayed session list (grep where sessions are -listed/filtered) so externally-archived chats stop polluting the drawer/grid; -add an "archived chats" listing. -**Acceptance:** can delete/archive/unarchive a chat from the drawer + slash -commands; archived chats are hidden from the normal list and listable on demand; -delete is confirmed. - -## 5.2 — Browse / search chats (M) -**Goal:** `/chats` is filterable; `/switch` resolves chats (not just lanes); -Ctrl+R recalls. -**Files:** `app.tsx` (`/chats`, `/switch`, Ctrl+R history-search — grep -`"/chats"`, `"/switch"`, `historySearch`, `cycleScope` (remove dead code)). -**Approach:** Make `/chats` list the active lane's chats with a filter; make -`/switch` accept a chat reference and resolve it via `applyDrawerChatSelection` -(grid re-entry aware); make Ctrl+R recall prompt history (fix the path that -currently can't recall) and remove the dead `cycleScope`. -**Acceptance:** `/chats` filters; `/switch ` switches chats; Ctrl+R recalls -prior prompts. - -## 5.3 — Session legibility: tag + completion + status glyphs (M) -**Goal:** Surface session tag, completion, and a colored wait/running glyph in the -drawer/grid so state reads at a glance. -**Files:** `format.ts` (tag rendering — grep `tag`, currently invisible), -`components/Drawer.tsx`, `chatInfo.ts`. -**Approach:** Render the session tag where chats are listed; add per-chat status -glyphs (running spinner / amber awaiting / dim ended) consistent with the grid -tile glyphs already added (`ChatView` tile header). Bucket by status/time if -useful. -**Acceptance:** tagged chats show their tag; chat rows show a clear status glyph. - -## 5.4 — `/context` visual breakdown + relax the Claude gate (M) -**Goal:** `/context` shows a visual token/context breakdown and works wherever the -runtime supports it (not Claude-only, text-only). -**Files:** `app.tsx` (`/context` handler — grep `"/context"`, `getContextUsage`), -`components/RightPane.tsx`, reuse the `TokenBar` from `FooterControls.tsx`. -**Approach:** Render context usage as a visual breakdown (a `TokenBar`-style bar + -per-bucket lines) in the right pane; relax the `provider === "claude"` gate where -the runtime returns usage for other providers. -**Acceptance:** `/context` shows a visual breakdown; works for supported non-Claude -providers; degrades gracefully when unavailable. - ---- - -## Definition of done (each task) -1. Typecheck clean (`tsc --noEmit`). -2. Relevant scoped vitest green + a new unit test for any pure helper added. -3. Full `npx vitest run` green (currently 842 tests — don't regress). -4. `npm run build` succeeds (verifies the bundled CLI). -5. TUI-appropriate (Ink Box/Text, theme tokens, width-degrades) and consistent - with the patterns above. Commit per task with a clear message. diff --git a/goal.md b/goal.md deleted file mode 100644 index ab88a5479..000000000 --- a/goal.md +++ /dev/null @@ -1,1068 +0,0 @@ -# ADE Work-tab Chat Orchestrator — Implementation Spec (goal.md) - -> **You are reading the hand-off spec.** It is self-contained: you do not need the prior planning conversation. Supplements `/Users/arul/ADE/plans/orch.md` (locked product shape) with concrete data models, IPC, tool sets, UI, workflow, build order, and testing. -> -> **Single bundled PR.** The ADE mission system is being uprooted in the *next* PR — this work must not depend on `apps/desktop/src/main/services/orchestrator/*`, `apps/desktop/src/renderer/components/missions/*`, `missionService.ts`, `chatMessageService.ts`, `coordinatorTools.ts`, or `OrchestratorChatThread` types. Borrow concepts, not code. -> -> **Final gate.** After implementation, run `/Users/arul/ADE/.claude/commands/audit.md` against the diff and resolve P0/P1 findings. - ---- - -## Table of contents - -1. [Context](#1-context) -2. [Locked decisions](#2-locked-decisions) -3. [Architecture](#3-architecture) -4. [Data model](#4-data-model) -5. [IPC surface](#5-ipc-surface) -6. [Tool sets per role](#6-tool-sets-per-role) -7. [System prompt + skill (`.agents/skills/ade-orchestrator/SKILL.md`)](#7-system-prompt--skill) -8. [Inter-agent ping system (per-runtime capability matrix)](#8-inter-agent-ping-system) -9. [Cancellation with smart revert](#9-cancellation-flow) -10. [UI components (full UI spec)](#10-ui-components) -11. [Live-editable plan](#11-live-editable-plan-v1) -12. [Permission profiles per provider](#12-permission-profiles-per-provider) -13. [Model routing](#13-model-routing) -14. [Validation as universal concerns](#14-validation-as-universal-concerns) -15. [User authority overrides defaults](#15-user-authority-overrides-defaults) -16. [Hardening (must-fix before ship)](#16-hardening-must-fix-before-ship) -17. [Build order](#17-build-order) -18. [Testing strategy](#18-testing-strategy) -19. [Critical files](#19-critical-files) -20. [Open items, deferred to v2, risks](#20-open-items-deferred-to-v2-risks) -21. [Final audit gate](#21-final-audit-gate) - ---- - -## 1. Context - -The orchestrator is a Work-tab-native multi-agent coordinator that lives entirely on top of ADE's existing chat surface. One chat becomes the **lead** planner/dispatcher; it spawns ordinary ADE chats as **workers** and **validators** in the same lane; they coordinate through a filesystem-resident **ground-truth bundle** (`manifest.json` + `plan.md` + `artifacts/`). The user can see and message any chat at any time. - -The design borrows ideas (not code) from three reference systems: - -- **Claude Code agent teams** — shared task list, claim discipline, lead synthesis. -- **Factory.ai missions** — planning-first, validators as a distinct role, milestone+feature decomposition, Mission Control as the truth artifact. -- **Cursor /orchestrate + Cursor plan mode** — planner/worker/verifier separation, structured handoffs, interactive plan-mode UX. - -ADE extends them by: -- treating the **manifest** as live mutable state every agent reads/writes; -- wiring **HTML design specs + the existing ADE built-in browser + the existing inspect-to-chat pipeline** into the planning loop; -- supporting **live plan edits** that propagate to in-flight workers; -- letting the user **pick a model per (role × tag) pair** at Planning time via ADE's in-house model picker (NOT a flat option list); -- **treating user instructions as authoritative** — defaults are advisory, the user can waive or alter any rule at any time, including mid-run. - -The orchestrator is **provider-agnostic from day one**: Claude Agent SDK, Codex App-Server JSON-RPC, Cursor (local SDK + cloud), Droid, OpenCode. Lead and workers/validators may run on different providers. - ---- - -## 2. Locked decisions - -Every entry is canon. They came out of a long planning deliberation; do not relitigate without the user. - -| Area | Decision | -|---|---| -| **This document** | `/Users/arul/ADE/.ade/worktrees/orchestrator-2e3a194b/goal.md`. Spec for the implementing agent only. The orchestrator does not read or write `goal.md` at runtime. | -| **Providers** | All five from day one. | -| **Macro phases** | `Planning → Developing → Validating` (+ optional `Wrap-up`). Planner-owned sub-stages. | -| **Roles** | `lead`, `developer`, `validator`. Lead never edits files. | -| **Q&A cadence** | Adaptive — model picks are one batched askUser per wave; scope, tags, validation are one-at-a-time. | -| **Tag taxonomy** | Project-specific, lead-derived. Lead inspects repo silently, proposes, asks user to confirm/edit. | -| **Model picker per (role, tag)** | New `PendingInputKind: "model_selection"` surfaces ADE's existing `ModelPicker` UI; user picks model + fast-mode + reasoning. Permission tier is locked to the provider's highest. NEVER a flat option list. | -| **Plan-approval gate** | Explicit user approval before the lead may spawn any worker. | -| **Live plan edits** | Mid-run. v1: right panel is **read-only** render; user edits propagate by talking to the lead in chat (lead patches manifest). Direct in-panel editing is v2. | -| **Lead chat chrome** | Animated **conic-gradient border ring** around the chat surface (1–2 px). `prefers-reduced-motion` → static rainbow border. Only on lead chats. | -| **Right panel** | Single **unified view** (no multi-tab dock). The Browser is the existing Work-tab sidebar surface and **NOT** duplicated. | -| **Phases UI** | Collapsible accordion; active phase auto-expanded; per-phase progress chip. | -| **Task cards** | Expanded with full metadata: title, tag chip, status pill, description preview, file anchors, owner, elapsed time, validation badges (click-for-evidence). | -| **Markdown engine** | Dual — keep `ChatMarkdown` for chat surface; add `react-markdown` + `remark-gfm` + `remark-mermaid` + `rehype-slug` + `rehype-raw` for the plan view only. | -| **Empty state** | Live "Planning in progress" with the Q&A history. | -| **Asset previews** | Inline where possible. HTML specs render as live sandboxed iframe thumbnails (~240×180); "Open in ADE browser" link beside. Mermaid renders inline. Screenshots inline at natural size with max-width clamp. | -| **Annotations** | **Pure ephemeral.** Select text/image/diagram/HTML → comment → injects into lead chat as a user message → vanishes from the plan view. NO persistence to manifest in v1. | -| **Sidebar grouping** | Flat list. Role badges (`LEAD` purple, `WORKER` blue + tag, `VALIDATOR` green + concern) on `SessionCard.tsx`. | -| **Worker session title** | Goal-summary first (e.g. "Build login form"); role/tag chip beside. | -| **Heartbeat cadence** | Every tool call (orchestration tool wrapper bumps `agents[me].lastHeartbeatAt` free). | -| **Cancellation** | Lead's `messageAgent(kind:"interrupt-replace", intent:"cancellation", cancellation:{revert: true \| false \| "review", reason})`. Worker reads, halts, then full revert via git checkout / leave / askUser. | -| **Validator findings** | Spawn fix-task that `supersedes: T-original`. Lead notices, messages original worker with directive. Loop until validator passes. | -| **Manual spawn (user → worker)** | No, lead-only in v1. | -| **Wake-up** | Worker pings lead on every action affecting another agent — done/failed, manifest patch on shared state, asset registration, error. Inter-worker pings **always go through the lead**. | -| **Spawn brief** | Lead composes free-form; skill mandates required sections: `## TASK / ## FILES / ## DEPENDENCIES / ## GATES / ## PEERS / ## SUCCESS`. Server-side `spawnAgent` validates section presence. | -| **Peer context** | Full peer roster in every spawn brief (who, role, tag, current task, status). | -| **Ping visibility** | Regular user-role messages with `metadata.orchestrationOrigin = { runId, fromSessionId, kind, intent, taskId? }`. UI renders a small "from " purple chip in the message header. Receiving agent reads metadata to know the source. | -| **Idle heartbeat** | Lazy. Service does NOT auto-inject. On any incoming lead message (worker ping, user msg), if >5 min since lead's last turn, the system prompt is enriched with `## Since you last replied (Xm): summary`. | -| **Ping primitives** | Three: `queue` / `interrupt-replace` / `wake`. Caller picks explicitly; skill includes the per-runtime capability table (§8) so the lead knows what each target supports. | -| **Spoofing hardening** | None in v1. Skill discipline only. | -| **Schema reservations** | All in (attempts, budget, spawnFingerprint, labels, priority, supersedes, checklist.runs, schemaCompatibility, leadState, history). Cheap now, expensive later. | -| **Validation principles** | Universal concerns (NOT hardcoded `audit_like`/`automate_like`/`finalize_like` kinds with baked-in ADE behavior). Planner inspects codebase → detects applicable concerns → asks user where uncertain → writes codebase-specific validation steps into manifest. | -| **No PR handoff, no doc-structure assumptions** | Orchestrator's scope ends at `Validating` complete. It does not push, open PRs, or assume any particular doc/test/CI structure. | -| **User authority** | Authoritative. Any default in the skill can be waived by direct user instruction. Logged to `manifest.userOverrides`. No re-prompting. | - ---- - -## 3. Architecture - -Lane-local. Single new main-process service (`orchestrationService`) owns the bundle and IPC. Renderer mounts a right-side panel when the active session is part of a run. Tool sets compose from `createUniversalToolSet`'s existing branch site. - -``` - ┌──────────────────────────────────────┐ - │ User (composer + plan-panel + chat) │ - └───────────────┬──────────────────────┘ - │ ade:agent-chat:* events - ▼ - ┌──────────────────────────────────────────────────────┐ - │ Lead chat (interactionMode = orchestrator-lead) │ - │ • ade-orchestrator skill + orchestrator system prompt│ - │ • tools: spawnAgent, messageAgent, getAgentTranscript│ - │ manifestPatch, planAppend, askUserForModelSelection│ - │ askUser, registerAsset + read-only base. │ - │ • NO editFile / writeFile / bash / exitPlanMode. │ - └────────────┬───────────────────────────────────┬─────┘ - │ IPC │ subscribe - ▼ ▼ - ┌────────────────────────────────┐ ┌─────────────────────┐ - │ orchestrationService (NEW) │◀───────▶│ Right plan panel │ - │ • bundle CRUD, etag, mutex │ events │ (NEW, unified view) │ - │ • spawn/inject/transcript-read │ └─────────────────────┘ - │ • chokidar watcher (debounced) │ - └─────────┬──────────────────┬───┘ - │ fs writes │ agentChatService.spawn / inject / steer - ▼ ▼ - ┌────────────────────────────┐ ┌──────────────────────────────────────┐ - │ .ade/orchestration/ │ │ Worker/Validator chats (lane-local) │ - │ manifest.json (etag) │◀──│ • orchestrationRole + runId │ - │ plan.md (append-only) │ │ • tightened tool set + skill rules │ - │ artifacts/, artifacts/ui │ │ • patch manifest, append plan │ - └────────────────────────────┘ └──────────────────────────────────────┘ -``` - ---- - -## 4. Data model - -### 4.1 Session schema additions - -Extend `AgentChatSession` and `AgentChatSessionSummary` in `apps/desktop/src/shared/types/chat.ts:731+`: - -```ts -export type OrchestrationRole = "lead" | "worker" | "validator"; - -// Existing AgentChatInteractionMode = "default" | "plan" → extend: -export type AgentChatInteractionMode = - | "default" | "plan" - | "orchestrator-lead" | "orchestrator-worker" | "orchestrator-validator"; - -// Added on session + summary: -orchestrationRunId?: string; -orchestrationRole?: OrchestrationRole; -orchestrationParentSessionId?: string; -orchestrationTag?: string; -orchestrationStepId?: string; -orchestrationBundlePath?: string; -``` - -Persist through the existing `persistChatState(managed)` path alongside `identityKey` / `surface` / `automationId`. All fields optional for migration tolerance. - -### 4.2 Manifest schema - -New file `apps/desktop/src/shared/types/orchestration.ts`: - -```ts -export const ORCHESTRATION_MANIFEST_VERSION = 1; - -export type OrchestrationPhaseId = "planning" | "developing" | "validating" | "wrapup"; - -export type OrchestrationManifest = { - version: 1; - schemaCompatibility?: { minReader: 1; maxKnown: 1 }; - runId: string; - laneId: string; - bundlePath: string; - etag: string; // monotonic; bumped on every patch - serverGeneration: number; // monotonic across git-checkouts; persisted at .gen - createdAt: string; updatedAt: string; - title: string; - goalSummary: string; - currentPhase: OrchestrationPhaseId; - phases: OrchestrationPhase[]; - agents: OrchestrationAgent[]; - tasks: OrchestrationTask[]; - validationStrategy: ValidationStrategy; - modelRouting: ModelRouting; - assets: OrchestrationAsset[]; - decisions: DecisionLogEntry[]; - userOverrides: UserOverrideEntry[]; - leadState: { lastSnapshotEtag?: string; lastSnapshotSeenAt?: string }; - history: Array<{ etag: string; at: string; summary: string; patchKindSummary?: string }>; // ring buffer (last 50) - defaultBudget?: AgentBudget; - - // v2 reservations (present in v1 schema; unused): - coordinatorSessionId?: string; - peerRunIds?: string[]; - parentRunId?: string; - forkedAtEtag?: string; - forkReason?: string; -}; - -export type OrchestrationPhase = { - id: OrchestrationPhaseId; - title: string; - status: "pending" | "active" | "done" | "skipped"; - startedAt?: string; completedAt?: string; -}; - -export type OrchestrationAgent = { - sessionId: string; - role: OrchestrationRole; - tag?: string; - displayName?: string; - goalSummary: string; - status: "pending" | "running" | "blocked" | "completed" | "failed"; - currentStepId?: string; - cancellationRequested?: boolean; // §9 - lastHeartbeatAt?: string; - spawnedAt: string; - spawnFingerprint?: SpawnFingerprint; - budget?: AgentBudget; - usage?: { tokensIn?: number; tokensOut?: number; costUsd?: number; turns?: number; elapsedMs?: number }; -}; - -export type SpawnFingerprint = { - provider: AgentChatProvider; - modelId: string; - reasoningEffort?: string | null; - codexFastMode?: boolean; - resolvedAt: string; - routingKey: "byRoleTag" | "byTag" | "byRole" | "default" | "fallback" | "override"; -}; - -export type AgentBudget = { - maxTokens?: number; maxCostUsd?: number; - maxWallClockMs?: number; maxTurns?: number; - onExceeded?: "pause" | "interrupt" | "warn"; -}; - -export type OrchestrationTask = { - id: string; // stable - phaseId: OrchestrationPhaseId; - title: string; description: string; - status: "pending" | "claimed" | "in_progress" | "review" | "done" | "failed"; - blockedBy?: string[]; blocks?: string[]; - supersedes?: string[]; supersededBy?: string[]; // for validator fix-tasks - relatedTaskIds?: string[]; // v2 reservation - filesHint?: string[]; - tag?: string; - labels?: string[]; - priority?: "low" | "normal" | "high" | "critical"; - estimatedComplexity?: "trivial" | "small" | "medium" | "large" | "spike"; - assigneeSessionId?: string; - claimedAt?: string; claimLeaseUntil?: string; - attempts?: OrchestrationTaskAttempt[]; // append-only history - currentAttemptId?: string; - evidence?: EvidenceRef[]; - validationGate: { required: boolean; stepIds: string[] }; - humanOverride?: { byUserId?: string; at: string; fromStatus: OrchestrationTask["status"]; toStatus: OrchestrationTask["status"]; reason?: string }; -}; - -export type OrchestrationTaskAttempt = { - id: string; - sessionId: string; - startedAt: string; endedAt?: string; - outcome: "succeeded" | "failed" | "interrupted" | "cancelled" | "superseded"; - evidence?: EvidenceRef[]; - failureReason?: string; -}; - -// Validation — universal concerns (NOT hardcoded ADE-specific kinds) -export type ValidationConcern = - | "reverify_changes" // audit principle (recommended default for every worker) - | "test_suite_truthfulness" // automate principle (only when codebase has tests) - | "surface_parity" // automate principle (only when ancillary surfaces exist) - | "pre_completion_gate" // finalize principle minus PR-handoff (when codebase has CI rubric) - | "deep_maintainability" // thermal principle (opt-in for high-risk diffs) - | "custom"; // planner-defined - -export type ValidationStrategy = { - steps: ValidationStep[]; - checklist: ValidationChecklistItem[]; -}; - -export type ValidationStep = { - id: string; - concern: ValidationConcern; - scope: "per_worker" | "per_step" | "mission_exit"; - required: boolean; - prompt: string; // PLANNER-DERIVED, codebase-specific. See §14. - evidenceRequired: ("plan_md_section" | "manifest_checklist" | "diff_summary" | "screenshot" | "test_log")[]; - appliesToTaskIds?: string[]; // empty = all tasks in scope -}; - -export type ValidationChecklistItem = { - id: string; - stepId: string; - taskId?: string; // null = mission-level - runs: ValidationChecklistRun[]; // append-only - latestRunId: string; -}; - -export type ValidationChecklistRun = { - id: string; - runBySessionId: string; - status: "running" | "passed" | "failed"; - attachedEvidence?: EvidenceRef[]; - notes?: string; - startedAt: string; endedAt?: string; - supersedes?: string; // prior run id (re-runs preserve history) -}; - -export type EvidenceRef = - | { kind: "plan_md_section" | "artifact" | "screenshot" | "test_log"; path: string; sha256?: string; range?: { startLine: number; endLine: number } } - | { kind: "transcript_excerpt"; sessionId: string; turnId: string; range?: { startCharOffset: number; endCharOffset: number } } - | { kind: "external_url"; url: string }; // v2-shaped; harmless in v1 - -export type ModelRouting = { - default?: ModelSelection; - byRole?: Partial>; - byTag?: Record; - byRoleTag?: Record; // key = `${role}:${tag}` -}; - -export type ModelSelection = { - provider: AgentChatProvider; - modelId: string; - reasoningEffort?: string | null; - codexFastMode?: boolean; -}; - -export type OrchestrationAsset = { - id: string; path: string; // relative to bundle root - kind: "html_spec" | "screenshot" | "test_log" | "doc"; - version: number; - approval?: "pending" | "approved" | "rejected"; - notes?: string; -}; - -export type DecisionLogEntry = { - id: string; at: string; - source: "user" | "lead" | "worker" | "validator"; - summary: string; - refs?: { taskId?: string; stepId?: string; assetId?: string }; -}; - -export type UserOverrideEntry = { - id: string; at: string; - scope: "session" | "phase" | "task" | "step"; - appliedToId?: string; - instruction: string; // user's literal words - affectedDefault?: string; // skill rule that was overridden -}; -``` - -### 4.3 Bundle layout - -``` -/.ade/orchestration// - manifest.json - plan.md - .gen # monotonic serverGeneration counter (outside manifest, see §16.7) - artifacts/ - ui/.html - evidence/<*.png|*.log|*.md> -``` - -`` resolved via `managed.laneWorktreePath` (`apps/desktop/src/main/services/chat/agentChatService.ts:14193`). - -### 4.4 Shared-types additions - -`PendingInputKind` (`apps/desktop/src/shared/types/chat.ts:692`): - -```ts -"approval" | "question" | "structured_question" | "permissions" | "plan_approval" | "model_selection" -``` - -For `model_selection`, `providerMetadata` carries `{ role; tag; availableModels: ModelCatalogSnapshot; suggested?: ModelSelection }`. Resolved via `IPC.agentChatRespondToInput` with `{ selection: ModelSelection }`. - ---- - -## 5. IPC surface - -All channels in `apps/desktop/src/shared/ipc.ts`; handlers in `apps/desktop/src/main/services/ipc/registerIpc.ts` next to the existing `agentChat*` cluster (line 6463+). Service file: `apps/desktop/src/main/services/orchestration/orchestrationService.ts` (NEW; deliberately under `services/orchestration/` singular, NOT `services/orchestrator/` which is being deleted). - -| Channel | Args | Return | Purpose | -|---|---|---|---| -| `orchestrationRunCreate` | `{ laneId; leadSessionId; title?; goalSummary? }` | `{ runId; manifest; etag }` | Bootstrap bundle with `phases:[planning]` and `agents:[lead]`. | -| `orchestrationBundleRead` | `{ runId }` | `{ manifest; planMd; etag }` | Atomic full read. | -| `orchestrationManifestReadSection` | `{ runId; section: "tasks" \| "agents" \| "validationStrategy" \| "decisions" \| "assets" }` | `{ section; data; etag }` | Sectioned read; saves bandwidth. | -| `orchestrationManifestPatch` | `{ runId; patches; ifMatchEtag }` | `{ manifest; etag }` \| `{ error:"etag_conflict"; manifest; etag }` | RFC-6902 subset (`add`/`replace`/`remove`); arrays addressed by `{id:X}` predicate, **NEVER by index**. Per-runId AsyncMutex. Validates schema + per-role patch-path whitelist (§6). Atomic write (`.tmp` + fsync + rename). Bumps etag. Broadcasts `ade.orchestration.event` with diff payload `{ patch }`. | -| `orchestrationPlanAppend` | `{ runId; section; body; pinId? }` | `{ planMd; etag }` | Append-only writer. Section headings stable for renderer anchors. | -| `orchestrationPlanWrite` | `{ runId; nextPlanMd; ifMatchEtag }` | `{ planMd; etag }` | User-only (dock edits). Etag-guarded. | -| `orchestrationSpawnAgent` | `{ runId; role:"worker"\|"validator"; tag; goalSummary; stepId?; initialMessage; modelOverride? }` | `{ sessionId; manifest; etag }` | Resolves `(role,tag) → ModelSelection` (§13). Validates `initialMessage` contains required sections (TASK/FILES/DEPS/GATES/PEERS/SUCCESS). Calls `agentChatService.createSession` with the locked permission profile (§12) and the right `interactionMode`. Writes `agents[]`. Sets initial claim if `stepId`. | -| `orchestrationAgentInject` | `{ targetSessionId; payload }` | `void` | Replacement for the deleted `sendAgentMessage`. Validates source/target are in the same run. Routes through `agentChatService.send`/`steer`/`interrupt` based on `payload.kind` (queue / interrupt-replace / wake — see §8). Delivered as a regular user-role message with `metadata.orchestrationOrigin` field on the chat row. | -| `agentChatReadTranscript` | `{ sessionId; limit?; since? }` | `AgentChatTranscriptEntry[]` | New IPC; wraps the private `readTranscriptEntries(managed)` at `agentChatService.ts:5335`. | -| `orchestrationAssetRegister` | `{ runId; relPath; kind; version?; approval? }` | `{ asset; etag }` | Records artifact metadata. | -| `orchestrationClaimTask` | `{ runId; taskId; sessionId; leaseMs }` | `{ ok; reason?; manifest; etag }` | Atomic claim under per-runId mutex. | -| `orchestrationReleaseTask` | `{ runId; taskId; sessionId; status }` | `{ manifest; etag }` | Release/transition. | -| `orchestrationRunList` | `{ laneId? }` | `OrchestrationManifest[]` (summary) | Listing. | - -**Event channel.** `ade.orchestration.event` payload `{ runId; kind: "manifest" | "plan" | "asset"; etag; patch?: ManifestPatch[]; manifest?; planMd?; planPatch?: { from: string; to: string } }`. Emitted on every successful write AND by chokidar watcher (debounced 50 ms, scoped to ``, with self-write suppression — see §16.1). - -Preload bridge: `window.ade.orchestration.*`. - ---- - -## 6. Tool sets per role - -New factory in `apps/desktop/src/main/services/ai/tools/orchestrationTools.ts` (NEW). Composes from `createUniversalToolSet` (`apps/desktop/src/main/services/ai/tools/universalTools.ts:2617`) by taking the read-only subset and adding orchestration-specific tools. Invoked at the same site as `createUniversalToolSet`, gated on `interactionMode === "orchestrator-lead" | "orchestrator-worker" | "orchestrator-validator"`. - -Patch-path whitelist lives in a sibling `apps/desktop/src/main/services/orchestration/patchPolicy.ts` (NEW; single source of truth — IPC handler, tool descriptions, and tests all consume it). Uses id-predicate paths (e.g. `/tasks/{id:T-003}/status`), never `/tasks/2`. - -### 6.1 Lead (`orchestrator-lead`) - -Read-only base: `readFile`, `grep`, `glob`, `listDir`, `gitStatus`, `gitDiff`, `gitLog`, `webFetch`, `webSearch`, `TodoWrite`, `TodoRead`, `askUser`. - -Adds: -- `spawnAgent(role, tag, goalSummary, stepId?, initialMessage, modelOverride?)` -- `messageAgent({ targetSessionId, kind: "queue"|"interrupt-replace"|"wake", intent: "directive"|"status"|"diff_notice"|"cancellation"|"question", text, taskId?, cancellation?: { revert: boolean | "review"; reason: string } })` -- `getAgentTranscript(sessionId, limit?, since?)` -- `manifestPatch(patches[], ifMatchEtag)` — lead may patch all paths except `agents[].sessionId` and worker-owned fields -- `planAppend(section, body)` -- `planWrite(nextPlanMd, ifMatchEtag)` — for re-plans -- `askUserForModelSelection(role, tag, suggestedProvider?, suggestedModel?)` -- `registerAsset(relPath, kind, version)` - -Denied: `editFile`, `writeFile`, `bash`, `exitPlanMode`. Rationale: the lead is the planner/dispatcher; all code changes flow through workers so audit trails are clean and per-worker validation gates run. - -### 6.2 Worker (`orchestrator-worker`) - -Full edit-capable set (`editFile`, `writeFile`, `bash`) **with the bundle bash blocklist** (§16.5): bash refuses writes under `/manifest.json` and `/plan.md`. `/artifacts/*` is freely writable. - -Adds: `claimTask`, `releaseTask`, `manifestPatch` (whitelisted to `agents[me].{status,currentStepId,lastHeartbeatAt}` + `tasks[claimedByMe].{status,evidence,attempts}`), `planAppend`, `messageAgent({ kind, intent: "status" | "question" only })`, `getAgentTranscript`, `registerAsset`. - -Denied: `spawnAgent`, `askUserForModelSelection`, `planWrite`, lead-only patch paths. Workers **cannot** patch their own `validationGate` or `validationStrategy.checklist` (server-enforced). - -### 6.3 Validator (`orchestrator-validator`) - -Same execution capability as worker (validators run tests, take screenshots): `editFile`, `writeFile`, `bash` (with bundle blocklist). - -Adds: `manifestPatch` (whitelisted to `validationStrategy.checklist[*].{runs,latestRunId}` and `agents[me].status`), `planAppend`, `getAgentTranscript`, `registerAsset`, `messageAgent({ kind, intent: "status" | "question" only })`. - -Denied: `spawnAgent`, `askUserForModelSelection`, `claimTask` on non-validation tasks. - ---- - -## 7. System prompt + skill - -### 7.1 Prompt injection sites - -**Claude.** Extend `opts.systemPrompt.append` builder at `agentChatService.ts:14183-14199`. New `buildOrchestratorRoleDirective(managed.session)` parallel to `buildClaudeInteractionModeDirective` (line 3141); returns directive when `interactionMode` starts with `orchestrator-`. Contents: role, runId, bundle path, "manifest is ground truth", "follow `.agents/skills/ade-orchestrator/SKILL.md`", per-role rules. - -**Non-Claude (Codex / Cursor / Droid / OpenCode).** Extend `buildCodingAgentSystemPrompt` in `apps/desktop/src/main/services/ai/tools/systemPrompt.ts:79` with `orchestrationRole?`, `orchestrationRunId?`, `orchestrationBundlePath?`. Emit the same directive block early. Call sites pull from `managed.session` like `permissionMode`. - -**Per-turn re-pinning.** Add `shouldInjectOrchestratorDirective` keyed off `managed.lastOrchestrationDirectiveKey !== :`, parallel to `shouldInjectLaneDirective` (line ~14474). - -### 7.2 `.agents/skills/ade-orchestrator/SKILL.md` - -Path: `/Users/arul/ADE/.agents/skills/ade-orchestrator/SKILL.md`. Auto-discovered via existing skill-walk (`agentChatService.ts:14166`). - -Outline (write all sections — this is the protocol): - -**Frontmatter** — `name: ade-orchestrator`; `description: Orchestrator-mode protocol for ADE Work-tab lead, worker, and validator chats. Use whenever the system prompt declares orchestrator-lead, orchestrator-worker, or orchestrator-validator mode.` - -**§1 — User authority overrides defaults.** Every rule below is a default. If the user directly instructs a deviation ("skip validation for this run", "no audit gate", "no asking, use Opus for everything", "only plan, I'll spawn workers myself"), comply, log a `UserOverrideEntry` to `manifest.userOverrides` with the literal instruction, surface the risk once in chat if material, and **do not re-prompt the default later** in the same scope. - -**§2 — Bundle as truth.** Read manifest before reasoning. Write through `manifestPatch` / `planAppend` only. Never invent state. Never fork canonical state into chat-only prose. - -**§3 — Planning protocol (lead only).** -1. Read `goal.md` if present; otherwise `askUser` for a one-line goal. -2. **Codebase intake — inspect-first, ask-on-uncertainty.** Read `CLAUDE.md`, `README.md`, package manifests (`package.json` / `pyproject.toml` / `Cargo.toml` / `go.mod` / etc.), CI config (`.github/workflows/` / `.circleci/` / `.gitlab-ci.yml`), top-level dir listing, recent `git log --oneline -50`. Infer: project shape, test stack, ancillary surfaces, available CI gates, doc structure (if any). -3. Propose a tag taxonomy (3–6 tags) and confirm via `askUser`. Examples seed by shape — fullstack web → `web-ui` / `backend` / `docs`; graphics → `render-pipeline` / `shaders`; mobile → `swiftui` / `storekit`. **Tags are project-specific.** -4. Propose tasks per phase. For Developing tasks, include `filesHint` derived from inspection where possible. -5. **Validation step derivation (see §6 below).** Detect which `ValidationConcern`s apply; ask user where uncertain; write codebase-specific `prompt` text into each `validationStrategy.steps[]` entry. -6. **Model picks.** For every `(role, tag)` pair, call `askUserForModelSelection`. Batched as one wave (per locked cadence). The picker UI is the ADE in-house `ModelPicker` — never present a flat option list. -7. Append a `DecisionLogEntry` per lock-in. -8. **Plan-approval gate.** Once Planning is complete, present `[ ✅ Approve Plan ]` via `askUser`. Until the user approves, do not call `spawnAgent`. - -**§4 — Developing protocol (worker only).** Claim before touch (`claimTask(taskId, 30min lease)`). Heartbeat is free (tool wrapper bumps `lastHeartbeatAt` on every call). After substantive edits, satisfy every `validationGate.stepIds[]` that has `scope: "per_worker"` and `required: true`. Default gate (when present): `reverify_changes` — execute its `prompt` from the manifest. Write evidence via `planAppend`; tick the `validationStrategy.checklist`. Only then patch `tasks[mine].status = "done"`. Server rejects `status: "done"` patches when required checklist items are not `passed`. - -**§5 — Validating protocol (validator only).** For each assigned step, read its `prompt` from `manifest.validationStrategy.steps[]` and execute it. The prompt is codebase-specific — do not assume vitest/jest/pytest or specific doc paths. Attach evidence; flip checklist `passed`/`failed`. On failure: **spawn fix-task** by reporting up to the lead (validator pings lead with status; lead patches a new task with `supersedes: T-original` and re-tasks the original worker). **Validators do not spawn agents themselves.** - -**§6 — Validation as universal concerns.** When the planner writes a `validationStrategy.steps[]` entry, it picks a `ValidationConcern` and authors the codebase-specific `prompt`. The concern names are classifiers; the **prompt is what the validator follows**. - -- **`reverify_changes`** (audit principle, *recommended default for every Developing task*). - - Principle: after substantive edits, re-read the *final* state of every touched file (not just remembered diffs). Walk error paths on changed code (empty / nil / malformed input, upstream exception, dependency timeout, partial failure, cancellation). Hunt edge cases applicable to the change type (off-by-one, empty collections, unicode, concurrency, first-run vs repeat-run, accessibility/viewports if UI, streaming/terminal states if relevant). Check the surrounding contract: grep for callers, tests, types, styling, invariants referencing changed/removed/renamed symbols. Fix what you find directly. Call out genuine ambiguities. Report what was checked, fixed, and deliberately left alone. - - Planner derivation: write the prompt naming the file types the worker is touching and the relevant edge-case categories for *this* codebase. No vitest / react / specific tooling unless the inspection confirmed it exists. - -- **`test_suite_truthfulness`** (automate principle, *only when codebase has tests*). - - Principle: "leave the suite more truthful and smaller, not just larger." Three passes in order: **PRUNE** (orphaned tests, `skip`/`only`/`todo`, anti-pattern tests like `expect(true)` or zero-assertion bodies, over-mocked fixtures, render-only UI tests) → **CONSOLIDATE** (merge fragmented files about one feature, respect a per-folder file budget) → **ADD** (only for new public contracts; hard caps the planner picks — e.g. "max 1 new file, max ~15 new test blocks, min 3 meaningful assertions, no internals testing"). - - Planner step: inspect for test files (common patterns + framework hints from package manifests). If none, **skip this concern entirely**. If yes, ask user `"we have tests in . Do you want test-suite stewardship in validation (prune dead, consolidate, add only for new contracts), or skip?"`; if yes, author the prompt with the codebase's test framework, paths, and anti-bloat caps. - -- **`surface_parity`** (automate principle, *only when ancillary surfaces exist*). - - Principle: when a feature lands, cross-cutting surfaces that shadow the change must stay in lockstep. Ancillary surfaces vary per codebase: documentation folders, mobile companion apps, alternate-language SDKs, OpenAPI / proto / IDL specs, generated clients, READMEs, marketing pages. - - Planner step: inspect for plausible surfaces (look for `docs/`, `README.md` density, `apps/mobile`/`apps/ios`/`apps/android`, `sdks/`, `openapi.yaml`, `proto/`, `.proto`, `clients/`, `examples/`, `website/`). For each surface detected, ask user `"I see in this repo. Should validation include keeping it in lockstep with the change? (e.g. update docs to reflect new behavior / update SDK types / regenerate clients)"`. For each yes, author a validation step naming that specific surface and what "in lockstep" means for it. - -- **`pre_completion_gate`** (finalize principle, *minus PR/push handoff*). - - Principle: before declaring the run complete, run the codebase's standard pre-completion checks. These vary: typecheck, lint, test suite, build, doc validators, lock-file consistency, asset compilation. **Orchestrator does not push, open PRs, or handle remote review** — that's a separate user-driven step. - - Planner step: inspect `package.json` scripts, `Makefile`, CI workflow yaml, common entry points (`npm run typecheck`/`lint`/`test`/`build`, `cargo check`/`clippy`/`test`/`build`, `pytest`, `go vet`/`go test`/`go build`, etc.). Propose a set; ask user `"propose pre-completion gates: . Add/remove?"`. Author the prompt with the exact commands and the codebase's local rules. - -- **`deep_maintainability`** (thermal principle, *opt-in for high-risk diffs*). - - Principle: when the diff is large or touches load-bearing code, run a deep maintainability/structure audit (cohesion, coupling, abstraction-leak, dead-on-arrival code, surprise contracts). Optional v1. - - Planner step: if user marks the run `risk: high` or asks for it, propose; otherwise skip. - -- **`custom`** — anything else the planner needs. - -**§7 — Inter-agent ping discipline.** Every state mutation that affects another agent must trigger a ping. Examples: worker patches `tasks[mine].status = "done"` → ping lead; lead patches `tasks[T].assigneeSessionId` → ping new + old assignee; validator patches a checklist run to `passed`/`failed` → ping lead; worker registers an asset → ping lead. Inter-worker pings **always go through the lead**. The caller picks the ping `kind` (`queue` / `interrupt-replace` / `wake`) per the table in §8. - -**§8 — Per-runtime ping capabilities** (lookup table the lead consults; see §8 of this spec for the source data). - -**§9 — Cancellation with smart revert.** Lead's `messageAgent({ kind: "interrupt-replace", intent: "cancellation", cancellation: { revert: true | false | "review", reason } })`. Worker reads, halts, then: -- `revert: true` — `git checkout -- ` for tracked files; `rm` for untracked files the worker created. Idle. -- `revert: false` — leave changes; status → `completed` with note "lead requested keep, no revert". -- `revert: "review"` — `askUser` ("Lead requested cancel; should I keep, revert, or partial?"). Follow user's instruction. Log to `decisions`. - -**§10 — Live plan-edit reaction (lead only).** When manifest etag bumps and the diff affects `tasks[*]` / `phases[*]` / `validationStrategy`: re-read manifest; compare against persisted `manifest.leadState.lastSnapshotEtag`; iterate `manifest.history.slice(after: lastSnapshotEtag)` to know what changed; for each in-flight assignee respond per §9; for newly added tasks lacking assignee, spawn or hold per dependency. After reconciling, patch `manifest.leadState = { lastSnapshotEtag: currentEtag, lastSnapshotSeenAt: now }`. - -**§11 — Spawn brief.** Free-form, **but** must contain headings: `## TASK`, `## FILES`, `## DEPENDENCIES`, `## GATES`, `## PEERS`, `## SUCCESS`. Server-side `spawnAgent` validates section presence. `## PEERS` lists every other in-flight agent with role, tag, current task, status. `## GATES` lists which `validationStrategy.steps[]` entries apply (with their codebase-specific prompts inlined or referenced by id). - -**§12 — Forbidden actions.** Forking canonical state into chat-only prose. Spawning agents not registered in the manifest. Using `bash` to edit `/{manifest.json, plan.md}` (sandbox enforces server-side too). Validators spawning agents. Workers patching their own `validationGate`. Workers patching checklist items. - ---- - -## 8. Inter-agent ping system - -ADE already has unified IPC: `agentChatSend` / `agentChatSteer` / `agentChatInterrupt` (`apps/desktop/src/shared/ipc.ts:187-193`; handler at `apps/desktop/src/main/services/ipc/registerIpc.ts:6507`). All five providers route through one handler in `agentChatService.sendMessage` / `steer` / `interrupt`. The orchestration layer's `orchestrationAgentInject` translates `(kind, intent, target) → the right unified call`. The pattern is precedented by `workerDeliveryService.ts:1054-1128` which already implements a steer→send fallback ladder. - -### 8.1 Unified primitives - -```ts -type Ping = - | { kind: "queue"; text: string } // → agentChatService.steer - | { kind: "interrupt-replace"; text: string } // → interrupt then sendMessage - | { kind: "wake"; text: string }; // → agentChatService.sendMessage (dormant-only) -``` - -Notes: -- `send` is **dormant-only**; mid-turn it throws. Use `steer` or `interrupt`. -- Claude can also "inline" via `dispatchSteer({ mode: "inline" })` (`agentChatService.ts:18921-19020`) for immediate fold-in without spawning a separate assistant turn. - -### 8.2 Per-runtime capability matrix - -| Provider | Native steer (mid-turn, model-aware) | Native cancel-and-replace | Wake-from-dormant | -|---|---|---|---| -| Claude Agent SDK | yes (`dispatchSteer inline`, `shouldQuery:false`) | yes (`query.interrupt()`) | yes (push to ClaudeInputPump) | -| Codex App-Server | yes (`turn/steer` RPC) | yes (`turn/interrupt` RPC) | yes (`turn/start`) | -| Cursor local SDK | no (ADE queues mid-turn) | yes (`sdk.cancel()`) | yes (`sdk.sendPrompt`) | -| Cursor cloud | no (`cloud.followup` queues) | yes (`cloud.run.cancel`) | yes (`cloud.send.stream` / `cloud.followup`) | -| Droid | no (ADE queues) | yes (`sdk.cancel()`) | yes (`sdk.sendPrompt`) | -| OpenCode | no (ADE queues) | yes (`session.abort`) | yes (`session.promptAsync`) | - -All providers support all three primitives at the API surface. Only Claude and Codex have **native model-aware** steering. For non-native-steer providers, `queue` is ADE-buffered and flushed on the next turn. - -### 8.3 When pings fire - -The orchestration tool wrappers compute affected agents from the patch and emit pings automatically when a tool mutates shared state: -- `claimTask` succeeds → no ping (caller is the actor). -- `releaseTask` with `status: "done" | "failed"` → ping lead. -- `manifestPatch` to `tasks[*].status` → ping lead. -- `planAppend` from worker / validator → ping lead. -- `manifestPatch` to `agents[*].status` → ping lead. -- `registerAsset` from worker / validator → ping lead. -- Lead `manifestPatch` to `tasks[T].assigneeSessionId` (reassignment) → ping new and old assignee. -- Lead `manifestPatch` to `tasks[T].validationGate` → ping the assigned worker. -- Lead `manifestPatch` setting `agents[X].cancellationRequested = true` → triggers bash interrupt on X (§16.2). -- User dock edit → service auto-pings lead. - -Workers never ping each other directly. Lead is the routing hub. - -### 8.4 Visibility - -The injected message lands on the target chat row with `metadata.orchestrationOrigin = { runId; fromSessionId; kind; intent; taskId? }`. UI renders a small purple "from " chip in the message header. Receiving agent's system prompt mentions: "messages with `orchestrationOrigin` metadata are sent by another orchestration agent, not the user; act accordingly". - ---- - -## 9. Cancellation flow - -`manifest.agents[].cancellationRequested` is the explicit signal. Worker bash factory subscribes to the manifest watcher; on a patch that sets it `true` for the worker, send `SIGTERM` to the in-flight child-process tree (existing `eventAbortController` pattern at `agentChatService.ts:8091`). After the child exits, the worker's normal loop picks up the cancellation envelope and follows SKILL §9 (`revert` / `keep` / `review`). - ---- - -## 10. UI components - -This section is the full UI spec. The user emphasized "make the new panel really nice." - -### 10.1 Composer "+" menu entry - -In `apps/desktop/src/renderer/components/chat/AgentChatComposer.tsx` add a menu item **"New orchestrator chat"** to the attachment/action menu near the composer toolbar. Also add to the sidebar "+ New Chat" picker in `apps/desktop/src/renderer/components/terminals/SessionListPane.tsx:596`. On click: new draft kind `"chat-orchestrator"`; flow ends in `agentChatCreate` with `interactionMode: "orchestrator-lead"`; immediately followed by `orchestrationRunCreate` to allocate the bundle. - -Both entry points coexist with regular "+ New Chat". The orchestrator entry carries an "Orchestrator" purple-accent label tone. - -### 10.2 Lead chrome (rainbow ring) - -New file: `apps/desktop/src/renderer/components/chat/OrchestratorLeadFrame.tsx`. Wrapper component that renders an animated conic-gradient ring around the chat surface. Applied from `AgentChatPane.tsx` only when `session.interactionMode === "orchestrator-lead"`. - -Two visual modes behind one component: -- **Default.** Slow CSS conic-gradient on a pseudo-border + subtle box-shadow pulse. Border itself slowly cycles red → orange → yellow → green → blue → violet. <40 ms additional render cost. -- **`@media (prefers-reduced-motion: reduce)`** — static rainbow border (no animation). - -The wrapper does **not** alter layout; it sits between `ChatSurfaceShell` and the inner pane. Worker/validator chats get no rainbow but get a small role chip in the header (driven by `session.orchestrationRole`). - -ASCII shape (default): - -``` -┌─🌈──────────────────────────────────────────────────┐ -│ Lead · web-app orchestrator │ -│ │ -│ I'm in Planning phase. Tell me the goal │ -│ in one sentence... │ -│ │ -│ ┌─────────────────────────────────────────────────┐ │ -│ │ type here... │ │ -│ └─────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────┘ -(border itself slowly cycles) -``` - -### 10.3 Right-side plan panel - -New file: `apps/desktop/src/renderer/components/orchestration/OrchestrationPanel.tsx`. Mounted in `AgentChatPane.tsx` whenever the active session has `orchestrationRunId`. Subscribes via `window.ade.orchestration.subscribe(runId, cb)` (preload wrapper around `ade.orchestration.event`). - -**Layout.** Always-visible vertical pane to the right of the chat, like Cursor's right panel. Collapse arrow at the top-right (collapsed → icon strip, expanded → full panel). Width: ~360 px default, resizable. - -**Single unified view — NO multi-tab dock.** The panel renders top-to-bottom: - -1. **Run header.** Run title, lane name, current phase pill, lead identity. A small `⊕ collapse` arrow on the right. -2. **Phases accordion** — Planning / Developing / Validating (+ optional Wrap-up). Each header carries status + progress chip. -3. **Task cards** under each phase (§10.4). -4. **plan.md narrative** rendered through the new markdown engine (§10.5). -5. **Inline asset previews** at the spot they're referenced in `plan.md` (§10.6). - -**Lead view = all of the above. Worker/Validator view = same panel but read-only (tasks not editable, no plan rewrite affordance).** - -**Empty state during Planning** (before any tasks exist): - -``` -✨ Planning in progress - -✓ Q1 What's the goal? - A Rebuild the login flow. - -✓ Q2 Project tags I'm proposing: - [web-ui] [backend] [docs] - A Confirm + add [tests] - -✓ Q3 Pick model for developer:web-ui - A Claude Sonnet 4.6, xhigh - -⏳ Q4 Pick validation steps... - (awaiting answer) - -Tasks will appear here once -planning completes. -``` - -Each Q&A row is built from the lead's `decisions[]` entries that match planning-question shapes. - -### 10.4 Task card - -Expanded card with full metadata. ASCII shape: - -``` -┌────────────────────────────────────────────┐ -│ T-01 • [web-ui] ◆ done 🔍 ⋯ │ -│ build login form │ -│ │ -│ Form with email + pw inputs; uses │ -│ /auth route; routes to /dash on success. │ -│ │ -│ 📄 src/login.tsx src/auth.ts │ -│ 👤 worker:claude · ⏱ 12m elapsed │ -│ ✓ reverify_changes ✓ test_suite │ -└────────────────────────────────────────────┘ -``` - -Elements: -- Top row: task id, tag chip, status pill (`pending` / `claimed` / `in_progress` / `review` / `done` / `failed`), 🔍 expand button, ⋯ context menu. -- Title row. -- Description (clamped to 3 lines; click to expand). -- File anchors row — `filesHint` rendered as clickable chips. Click → dispatches `ade:agent-chat:add-attachment` so the user can insert the file ref into the lead composer. -- Owner row — assignee linked to that worker's chat session; click switches the Work tab to that chat. Elapsed time computed from `claimedAt`. -- Validation badges — one per applicable `ValidationStep`. `✓` passed, `⏳` running, `✗` failed, `—` pending. Click any badge → evidence pop-over showing the latest `ValidationChecklistRun.attachedEvidence`. - -Context menu (⋯): `Open worker chat` / `Cancel task...` (revert / keep / review) / `Re-spawn` / `Mark done manually` (writes `humanOverride`). - -### 10.5 Markdown engine - -Dual. - -- Chat surface keeps `apps/desktop/src/renderer/components/chat/chatMarkdown.tsx` (`ChatMarkdown`) — unchanged. -- Plan view uses **new** `apps/desktop/src/renderer/components/orchestration/PlanMarkdown.tsx` built on `react-markdown` + `remark-gfm` + `remark-mermaid` + `rehype-slug` + `rehype-raw`. - -`PlanMarkdown` component overrides: -- ```` ```mermaid ```` fence → lazy-load mermaid; render diagram inline. -- `img` referencing a registered asset → embed inline (screenshot at natural size, max-width clamp). -- `a` referencing `artifacts/ui/*.html` → render as the spec preview card (§10.6). -- Headings get stable `data-section-id=""` via `rehype-slug` so annotation anchors (and any future persistence) survive content edits. - -### 10.6 Inline asset previews - -**HTML specs.** Render as a card containing: - -``` -┌─────────────────────────────────────┐ -│ artifacts/ui/login.html │ -│ ┌─────────────────────────────────┐ │ -│ │ [iframe sandbox="" │ │ -│ │ src="file:///..."> │ │ -│ │ 240×180 thumbnail │ │ -│ │ ] │ │ -│ └─────────────────────────────────┘ │ -│ [ 🔍 Open in ADE browser ] │ -└─────────────────────────────────────┘ -``` - -- `