@@ -401,28 +527,28 @@ export default function ContextCompact({ title }: { title?: string }) {
diff --git a/web/src/components/visualizations/shared/step-controls.tsx b/web/src/components/visualizations/shared/step-controls.tsx
index cd0beaa2c..bc33b7308 100644
--- a/web/src/components/visualizations/shared/step-controls.tsx
+++ b/web/src/components/visualizations/shared/step-controls.tsx
@@ -1,6 +1,7 @@
"use client";
import { Play, Pause, SkipBack, SkipForward, RotateCcw } from "lucide-react";
+import { useTranslations } from "@/lib/i18n";
import { cn } from "@/lib/utils";
interface StepControlsProps {
@@ -28,6 +29,8 @@ export function StepControls({
stepDescription,
className,
}: StepControlsProps) {
+ const t = useTranslations("sim");
+
return (
{/* Annotation */}
@@ -46,7 +49,8 @@ export function StepControls({
@@ -54,14 +58,16 @@ export function StepControls({
onClick={onPrev}
disabled={currentStep === 0}
className="rounded-md p-1.5 text-zinc-500 hover:bg-zinc-100 hover:text-zinc-700 disabled:opacity-30 dark:text-zinc-400 dark:hover:bg-zinc-800 dark:hover:text-zinc-200"
- title="Previous step"
+ title={t("previous_step")}
+ aria-label={t("previous_step")}
>
{isPlaying ? : }
@@ -69,7 +75,8 @@ export function StepControls({
onClick={onNext}
disabled={currentStep === totalSteps - 1}
className="rounded-md p-1.5 text-zinc-500 hover:bg-zinc-100 hover:text-zinc-700 disabled:opacity-30 dark:text-zinc-400 dark:hover:bg-zinc-800 dark:hover:text-zinc-200"
- title="Next step"
+ title={t("next_step")}
+ aria-label={t("next_step")}
>
diff --git a/web/src/data/annotations/s01.json b/web/src/data/annotations/s01.json
index 80902db18..fc2edb5c9 100644
--- a/web/src/data/annotations/s01.json
+++ b/web/src/data/annotations/s01.json
@@ -8,7 +8,8 @@
"alternatives": "We could have started with a richer toolset (file I/O, HTTP, database), but that would obscure the core insight: an LLM with a shell is already a general-purpose agent. Starting minimal also makes it obvious what each subsequent version actually adds.",
"zh": {
"title": "为什么仅靠 Bash 就够了",
- "description": "Bash 能读写文件、运行任意程序、在进程间传递数据、管理文件系统。任何额外的工具(read_file、write_file 等)都只是 bash 已有能力的子集。增加工具并不会解锁新能力,只会增加模型需要理解的接口。模型只需学习一个工具的 schema,实现代码不超过 100 行。这就是最小可行 agent:一个工具,一个循环。"
+ "description": "Bash 能读写文件、运行任意程序、在进程间传递数据、管理文件系统。任何额外的工具(read_file、write_file 等)都只是 bash 已有能力的子集。增加工具并不会解锁新能力,只会增加模型需要理解的接口。模型只需学习一个工具的 schema,实现代码不超过 100 行。这就是最小可行 agent:一个工具,一个循环。",
+ "alternatives": "也可以从一开始就给更丰富的工具集(文件 I/O、HTTP、数据库等),但会掩盖核心结论:带 shell(命令行)的 LLM 已经是通用代理。先从极简起步,才能看清后续每个版本到底新增了什么能力。"
},
"ja": {
"title": "Bash だけで十分な理由",
@@ -22,7 +23,8 @@
"alternatives": "A framework-level subagent system (like v3's Task tool) gives more control over what tools the subagent can access and how results are returned. But at v0, the point is to show that process spawning is the most primitive form of agent delegation -- no shared memory, no message passing, just stdin/stdout.",
"zh": {
"title": "用递归进程创建实现子代理机制",
- "description": "当 agent 执行 `python v0.py \"subtask\"` 时,它会创建一个全新的进程,拥有全新的 LLM 上下文。这个子进程实际上就是一个子代理:有自己的系统提示词、对话历史和任务焦点。子进程完成后,父进程通过 stdout 获取结果。这就是不依赖任何框架的子代理委派——纯粹的 Unix 进程语义。每个子进程天然隔离关注点,因为它根本看不到父进程的上下文。"
+ "description": "当 agent 执行 `python v0.py \"subtask\"` 时,它会创建一个全新的进程,拥有全新的 LLM 上下文。这个子进程实际上就是一个子代理:有自己的系统提示词、对话历史和任务焦点。子进程完成后,父进程通过 stdout 获取结果。这就是不依赖任何框架的子代理委派——纯粹的 Unix 进程语义。每个子进程天然隔离关注点,因为它根本看不到父进程的上下文。",
+ "alternatives": "框架级子代理(例如 v3 的 Task(任务委托)工具)能更精细地控制子代理可用工具与返回格式。但在 v0 阶段,重点是展示“进程生成”就是最原始的委派:无共享内存、无消息总线,仅依赖 stdin/stdout。"
},
"ja": {
"title": "再帰プロセス生成によるサブエージェント機構",
@@ -36,7 +38,8 @@
"alternatives": "Later versions (v2) add explicit planning via TodoWrite. But v0 proves that implicit planning through the model's reasoning is sufficient for many tasks. The planning framework only becomes necessary when you need external visibility into the agent's intentions.",
"zh": {
"title": "没有规划框架——由模型自行决策",
- "description": "没有规划器,没有任务队列,没有状态机。系统提示词告诉模型如何处理问题,模型根据对话历史决定下一步执行什么 bash 命令。这是有意为之的:在这个层级,添加规划层属于过早抽象。模型的思维链本身就是计划。agent 循环只是不断询问模型下一步做什么,直到模型不再请求工具为止。"
+ "description": "没有规划器,没有任务队列,没有状态机。系统提示词告诉模型如何处理问题,模型根据对话历史决定下一步执行什么 bash 命令。这是有意为之的:在这个层级,添加规划层属于过早抽象。模型的思维链本身就是计划。agent 循环只是不断询问模型下一步做什么,直到模型不再请求工具为止。",
+ "alternatives": "后续版本(v2)会通过 TodoWrite(待办写入)提供显式计划。但 v0 先证明:模型内部推理形成的隐式计划已能覆盖大量任务。只有当你需要把意图对用户和工具链显式暴露时,规划层才变成刚需。"
},
"ja": {
"title": "計画フレームワークなし——モデルが全てを決定",
diff --git a/web/src/data/annotations/s02.json b/web/src/data/annotations/s02.json
index 09e05a679..7a79e126e 100644
--- a/web/src/data/annotations/s02.json
+++ b/web/src/data/annotations/s02.json
@@ -8,7 +8,8 @@
"alternatives": "We could add specialized tools (list_directory, search_files, http_request), and later versions do. But at this stage, bash already covers those use cases. The split from v0's single tool to v1's four tools is specifically about giving the model structured I/O for file operations, where bash's quoting and escaping often trips up the model.",
"zh": {
"title": "为什么恰好四个工具",
- "description": "四个工具分别是 bash、read_file、write_file 和 edit_file,覆盖了大约 95% 的编程任务。Bash 处理执行和任意命令;read_file 提供带行号的精确文件读取;write_file 创建或覆盖文件;edit_file 做精确的字符串替换。工具越多,模型的认知负担越重——它必须在更多选项中做选择,选错的概率也随之增加。更少的工具也意味着更少的 schema 需要维护、更少的边界情况需要处理。"
+ "description": "四个工具分别是 bash、read_file、write_file 和 edit_file,覆盖了大约 95% 的编程任务。Bash 处理执行和任意命令;read_file 提供带行号的精确文件读取;write_file 创建或覆盖文件;edit_file 做精确的字符串替换。工具越多,模型的认知负担越重——它必须在更多选项中做选择,选错的概率也随之增加。更少的工具也意味着更少的 schema 需要维护、更少的边界情况需要处理。",
+ "alternatives": "当然可以继续增加 list_directory、search_files、http_request 等专用工具,后续版本也会这样做。但在这个阶段,bash 已覆盖这些需求。v0 到 v1 的关键增量是把文件操作从通用 shell 拆成结构化 I/O,从而降低 quoting/escaping 错误。"
},
"ja": {
"title": "なぜ正確に4つのツールなのか",
@@ -22,7 +23,8 @@
"alternatives": "Many agent frameworks add elaborate orchestration layers: ReAct loops with explicit Thought/Action/Observation parsing, LangChain-style chains, AutoGPT-style goal decomposition. These frameworks assume the model needs scaffolding to behave as an agent. Our approach assumes the model already knows how to be an agent -- it just needs tools to act on the world.",
"zh": {
"title": "模型本身就是代理",
- "description": "核心 agent 循环极其简单:不断调用 LLM,如果返回 tool_use 块就执行并回传结果,如果只返回文本就停止。没有路由器,没有决策树,没有工作流引擎。模型自己决定做什么、何时停止、如何从错误中恢复。代码只是连接模型和工具的管道。这是一种设计哲学:agent 行为从模型中涌现,而非由框架定义。"
+ "description": "核心 agent 循环极其简单:不断调用 LLM,如果返回 tool_use 块就执行并回传结果,如果只返回文本就停止。没有路由器,没有决策树,没有工作流引擎。模型自己决定做什么、何时停止、如何从错误中恢复。代码只是连接模型和工具的管道。这是一种设计哲学:agent 行为从模型中涌现,而非由框架定义。",
+ "alternatives": "很多框架会叠加复杂编排层,例如 ReAct 解析、链式工作流、自动目标分解。这类方案假设模型需要大量脚手架才像 agent。这里的立场相反:模型本身就是 agent,代码只负责把工具连接到可执行环境。"
},
"ja": {
"title": "モデルそのものがエージェント",
@@ -36,7 +38,8 @@
"alternatives": "Some agent systems let the model output free-form text that gets parsed with regex or heuristics (e.g., extracting code from markdown blocks). This is fragile -- the model might format output slightly differently and break the parser. JSON schemas trade flexibility for reliability.",
"zh": {
"title": "每个工具都有 JSON Schema",
- "description": "每个工具都为输入参数定义了严格的 JSON schema。例如,edit_file 要求 old_string 和 new_string 是精确的字符串,而非正则表达式。这消除了一整类错误:模型无法传递格式错误的输入,因为 API 会在执行前校验 schema。这也使模型的意图变得明确——当它用特定字符串调用 edit_file 时,不存在关于它想修改什么的解析歧义。"
+ "description": "每个工具都为输入参数定义了严格的 JSON schema。例如,edit_file 要求 old_string 和 new_string 是精确的字符串,而非正则表达式。这消除了一整类错误:模型无法传递格式错误的输入,因为 API 会在执行前校验 schema。这也使模型的意图变得明确——当它用特定字符串调用 edit_file 时,不存在关于它想修改什么的解析歧义。",
+ "alternatives": "另一种做法是让模型输出自由文本,再用正则或启发式去解析(例如从 Markdown 代码块提取指令)。这类方案很脆弱,格式稍变就会失效。JSON Schema(结构化输入约束)牺牲了一点灵活性,换来更高可靠性。"
},
"ja": {
"title": "全ツールに JSON Schema を定義",
diff --git a/web/src/data/annotations/s03.json b/web/src/data/annotations/s03.json
index b8e408ad3..b11331126 100644
--- a/web/src/data/annotations/s03.json
+++ b/web/src/data/annotations/s03.json
@@ -8,7 +8,8 @@
"alternatives": "The model could plan internally via chain-of-thought reasoning (as it does in v0/v1). Internal planning works but is invisible and ephemeral -- once the thinking scrolls out of context, the plan is lost. Claude's extended thinking is another option, but it's not inspectable by the user or by downstream tools.",
"zh": {
"title": "通过 TodoWrite 让计划可见",
- "description": "我们不让模型在思维链中默默规划,而是强制通过 TodoWrite 工具将计划外化。每个计划项都有可追踪的状态(pending、in_progress、completed)。这有三个好处:(1) 用户可以在执行前看到 agent 打算做什么;(2) 开发者可以通过检查计划状态来调试 agent 行为;(3) agent 自身可以在后续轮次中引用计划,即使早期上下文已经滚出窗口。"
+ "description": "我们不让模型在思维链中默默规划,而是强制通过 TodoWrite 工具将计划外化。每个计划项都有可追踪的状态(pending、in_progress、completed)。这有三个好处:(1) 用户可以在执行前看到 agent 打算做什么;(2) 开发者可以通过检查计划状态来调试 agent 行为;(3) agent 自身可以在后续轮次中引用计划,即使早期上下文已经滚出窗口。",
+ "alternatives": "模型也可以像 v0/v1 那样在内部链路中隐式规划,但这种计划不可见且易丢失;一旦上下文滚出窗口,就很难回看。extended thinking(扩展思考)也是类似问题:能力更强,但对用户和下游工具不透明。"
},
"ja": {
"title": "TodoWrite による計画の可視化",
@@ -22,7 +23,8 @@
"alternatives": "Allowing multiple in-progress items would let the agent context-switch between tasks, which seems more flexible. In practice, LLMs handle context-switching poorly -- they lose track of which task they were working on and mix up details between tasks. The single-focus constraint is a guardrail that improves output quality.",
"zh": {
"title": "同一时间只允许一个任务进行中",
- "description": "TodoWrite 工具强制要求任何时候最多只能有一个任务处于 in_progress 状态。如果模型想开始第二个任务,必须先完成或放弃当前任务。这个约束防止了一种隐蔽的失败模式:试图通过交替处理多个项目来'多任务'的模型,往往会丢失状态并产出半成品。顺序执行的专注度远高于并行切换。"
+ "description": "TodoWrite 工具强制要求任何时候最多只能有一个任务处于 in_progress 状态。如果模型想开始第二个任务,必须先完成或放弃当前任务。这个约束防止了一种隐蔽的失败模式:试图通过交替处理多个项目来'多任务'的模型,往往会丢失状态并产出半成品。顺序执行的专注度远高于并行切换。",
+ "alternatives": "允许多个 in_progress(进行中)看似更灵活,agent(代理)可以频繁切任务;但在实践里 LLM 对上下文切换并不稳定,容易混淆当前目标并产出半成品。单焦点约束是提升结果质量的有效护栏。"
},
"ja": {
"title": "同時に進行中にできるタスクは1つだけ",
@@ -36,7 +38,8 @@
"alternatives": "No cap would give the model full flexibility, but in practice leads to absurdly detailed plans. A dynamic cap (proportional to task complexity) would be smarter but adds complexity. The fixed cap of 20 is a simple heuristic that works well empirically -- most real coding tasks can be expressed in 5-15 meaningful steps.",
"zh": {
"title": "计划项上限为 20 条",
- "description": "TodoWrite 将计划项限制在 20 条以内。这是对过度规划的刻意约束。不加限制时,模型倾向于将任务分解成越来越细粒度的步骤,产出 50 条的计划,每一步都微不足道。冗长的计划很脆弱:如果第 15 步失败,剩下的 35 步可能全部作废。20 条以内的短计划保持在正确的抽象层级,更容易在现实偏离计划时做出调整。"
+ "description": "TodoWrite 将计划项限制在 20 条以内。这是对过度规划的刻意约束。不加限制时,模型倾向于将任务分解成越来越细粒度的步骤,产出 50 条的计划,每一步都微不足道。冗长的计划很脆弱:如果第 15 步失败,剩下的 35 步可能全部作废。20 条以内的短计划保持在正确的抽象层级,更容易在现实偏离计划时做出调整。",
+ "alternatives": "不设上限会给模型最大自由,但常导致过度细化。也可以做“按任务复杂度动态上限”,但实现成本更高。固定 20 条是经验上稳定的折中:多数真实编码任务可在 5-15 条有效步骤内表达清楚。"
},
"ja": {
"title": "計画項目の上限は20個",
diff --git a/web/src/data/annotations/s04.json b/web/src/data/annotations/s04.json
index 08ad115d0..4f7020e0f 100644
--- a/web/src/data/annotations/s04.json
+++ b/web/src/data/annotations/s04.json
@@ -8,7 +8,8 @@
"alternatives": "Sharing the parent's full context would give the subagent more information, but it would also flood the subagent with irrelevant details. Context window is finite -- filling it with parent history leaves less room for the subagent's own work. Fork-based approaches (copy the parent context) are a middle ground but still waste tokens on irrelevant history.",
"zh": {
"title": "子代理获得全新上下文,而非共享历史",
- "description": "当父代理通过 Task 工具创建子代理时,子代理从全新的消息历史开始,只包含系统提示词和委派的任务描述,不继承父代理的对话。这就是上下文隔离:子代理可以完全专注于特定子任务,不会被父代理长达数百条消息的对话干扰。结果作为单条 tool_result 返回给父代理,将子代理可能数十轮的交互压缩为一个简洁的回答。"
+ "description": "当父代理通过 Task 工具创建子代理时,子代理从全新的消息历史开始,只包含系统提示词和委派的任务描述,不继承父代理的对话。这就是上下文隔离:子代理可以完全专注于特定子任务,不会被父代理长达数百条消息的对话干扰。结果作为单条 tool_result 返回给父代理,将子代理可能数十轮的交互压缩为一个简洁的回答。",
+ "alternatives": "让子代理继承父代理完整上下文,信息确实更全,但也会被大量无关历史淹没。上下文窗口有限,把空间浪费在父流程历史上会挤压子任务本身。fork(上下文复制)是折中方案,但依然会额外消耗 token。"
},
"ja": {
"title": "サブエージェントは共有履歴ではなく新しいコンテキストを取得",
@@ -22,7 +23,8 @@
"alternatives": "Giving all subagents full tool access is simpler to implement but violates least privilege. A permission-request system (subagent asks parent for write access) adds complexity and latency. Static tool filtering by role is the pragmatic middle ground -- simple to implement, effective at preventing accidents.",
"zh": {
"title": "Explore 代理不能写入文件",
- "description": "创建 Explore 类型的子代理时,它只获得只读工具:bash(有限制)、read_file 和搜索工具,不能调用 write_file 或 edit_file。这实现了最小权限原则:一个被委派'查找函数 X 所有使用位置'的代理不需要写权限。移除写工具消除了探索过程中误修改文件的风险,同时缩小了工具空间,让模型在更少的选项中做出更好的决策。"
+ "description": "创建 Explore 类型的子代理时,它只获得只读工具:bash(有限制)、read_file 和搜索工具,不能调用 write_file 或 edit_file。这实现了最小权限原则:一个被委派'查找函数 X 所有使用位置'的代理不需要写权限。移除写工具消除了探索过程中误修改文件的风险,同时缩小了工具空间,让模型在更少的选项中做出更好的决策。",
+ "alternatives": "给所有子代理完整工具权限实现最简单,但违背最小权限原则。也可以做“子代理申请写权限、父代理审批”的机制,但会增加系统复杂度和交互延迟。按角色静态过滤工具是更务实的工程折中。"
},
"ja": {
"title": "Explore エージェントはファイルを書き込めない",
@@ -36,7 +38,8 @@
"alternatives": "Allowing recursive delegation (bounded by depth) would handle deeply nested tasks but adds complexity and the risk of runaway token consumption. In practice, single-level delegation covers most real-world coding tasks. Multi-level delegation is addressed in later versions (v6+) through persistent team structures instead of recursive spawning.",
"zh": {
"title": "子代理不能再创建子代理",
- "description": "Task 工具不包含在子代理的工具集中。子代理必须直接完成工作,不能继续委派。这防止了无限委派循环:没有这个约束,一个代理可能创建子代理,子代理又创建子代理,每一层都用略微不同的措辞重新委派同一任务,消耗 token 却毫无进展。一层委派足以处理绝大多数场景。如果任务对单个子代理来说太复杂,应该由父代理重新分解。"
+ "description": "Task 工具不包含在子代理的工具集中。子代理必须直接完成工作,不能继续委派。这防止了无限委派循环:没有这个约束,一个代理可能创建子代理,子代理又创建子代理,每一层都用略微不同的措辞重新委派同一任务,消耗 token 却毫无进展。一层委派足以处理绝大多数场景。如果任务对单个子代理来说太复杂,应该由父代理重新分解。",
+ "alternatives": "允许递归委派(并限制深度)可以覆盖更深层任务拆解,但会显著增加复杂度,并带来 token 失控风险。实践里单层委派已覆盖多数编码任务;多层协作更适合在后续版本用持久团队机制实现。"
},
"ja": {
"title": "サブエージェントは自身のサブエージェントを生成できない",
diff --git a/web/src/data/annotations/s05.json b/web/src/data/annotations/s05.json
index 10aa15c7c..342311367 100644
--- a/web/src/data/annotations/s05.json
+++ b/web/src/data/annotations/s05.json
@@ -8,7 +8,8 @@
"alternatives": "Injecting skills into the system prompt is simpler and gives skills higher priority in the model's attention. But it breaks prompt caching (every skill load creates a new system prompt variant) and bloats the system prompt over time as skills accumulate. The tool_result approach keeps things cache-friendly at the cost of slightly lower attention priority.",
"zh": {
"title": "Skill 通过 tool_result 注入,而非系统提示词",
- "description": "当 agent 调用 Skill 工具时,Skill 内容(SKILL.md 文件)作为 tool_result 在用户消息中返回,而非注入系统提示词。这是一个刻意的缓存优化:系统提示词在各轮次间保持静态,API 提供商可以缓存它(Anthropic 的 prompt caching、OpenAI 的 system message caching)。如果 Skill 内容在系统提示词中,每次加载新 Skill 都会使缓存失效。将动态内容放在 tool_result 中,既保持了昂贵的系统提示词可缓存,又让 Skill 知识进入了上下文。"
+ "description": "当 agent 调用 Skill 工具时,Skill 内容(SKILL.md 文件)作为 tool_result 在用户消息中返回,而非注入系统提示词。这是一个刻意的缓存优化:系统提示词在各轮次间保持静态,API 提供商可以缓存它(Anthropic 的 prompt caching、OpenAI 的 system message 缓存)。如果 Skill 内容在系统提示词中,每次加载新 Skill 都会使缓存失效。将动态内容放在 tool_result 中,既保持了昂贵的系统提示词可缓存,又让 Skill 知识进入了上下文。",
+ "alternatives": "把技能直接注入系统提示词实现更直观,也可能获得更高注意力权重;但会破坏提示词缓存(每次加载技能都生成新变体),并让系统提示词持续膨胀。tool_result(工具结果)路径更缓存友好,代价是注意力优先级略低。"
},
"ja": {
"title": "スキルはシステムプロンプトではなく tool_result で注入",
@@ -22,7 +23,8 @@
"alternatives": "Loading all skills upfront guarantees the model always has all knowledge available, but wastes tokens on irrelevant skills and may hit context limits. A recommendation system (model suggests skills, human approves) adds latency. Lazy loading lets the model self-serve the knowledge it needs, when it needs it.",
"zh": {
"title": "按需加载 Skill 而非预加载",
- "description": "Skill 不会在启动时加载。Agent 初始只拥有 Skill 名称和描述(来自 frontmatter)。当 agent 判断需要特定 Skill 时,调用 Skill 工具将完整的 SKILL.md 内容加载到上下文中。这保持了初始提示词的精简。一个正在修复 Python bug 的 agent 不需要加载 Kubernetes 部署 Skill——那会浪费上下文窗口空间,还可能用无关指令干扰模型。"
+ "description": "Skill 不会在启动时加载。Agent 初始只拥有 Skill 名称和描述(来自 frontmatter)。当 agent 判断需要特定 Skill 时,调用 Skill 工具将完整的 SKILL.md 内容加载到上下文中。这保持了初始提示词的精简。一个正在修复 Python bug 的 agent 不需要加载 Kubernetes 部署 Skill——那会浪费上下文窗口空间,还可能用无关指令干扰模型。",
+ "alternatives": "启动时加载全部技能可以保证知识随时可用,但会占据大量上下文并更容易触达窗口上限。加入“模型推荐+人工审批”的流程更稳,但会引入额外时延。按需加载让模型在需要时自助获取知识。"
},
"ja": {
"title": "起動時ではなくオンデマンドでスキルを読み込み",
@@ -36,7 +38,8 @@
"alternatives": "A separate metadata file (skill.yaml + skill.md) would work but doubles the number of files. Embedding metadata in the markdown (as headings or comments) requires parsing the full file to extract metadata. Frontmatter is a well-established convention (Jekyll, Hugo, Astro) that keeps metadata and content co-located but separately parseable.",
"zh": {
"title": "SKILL.md 采用 YAML Frontmatter + Markdown 正文",
- "description": "每个 SKILL.md 文件有两部分:YAML frontmatter(名称、描述、globs)和 markdown 正文(实际指令)。Frontmatter 作为 Skill 注册表的元数据——当 agent 问'有哪些可用 Skill'时,展示的就是这些信息。正文是按需加载的有效负载。这种分离意味着可以列出 100 个 Skill(每个只读几字节的 frontmatter)而不必加载 100 套完整指令集(每套可能数千 token)。"
+ "description": "每个 SKILL.md 文件有两部分:YAML frontmatter(名称、描述、globs)和 markdown 正文(实际指令)。Frontmatter 作为 Skill 注册表的元数据——当 agent 问'有哪些可用 Skill'时,展示的就是这些信息。正文是按需加载的有效负载。这种分离意味着可以列出 100 个 Skill(每个只读几字节的 frontmatter)而不必加载 100 套完整指令集(每套可能数千 token)。",
+ "alternatives": "也可以拆成 skill.yaml + skill.md 双文件,但会增加文件数量与维护成本。或把元数据埋在正文里再解析,但必须读取整文件。Frontmatter(前置元数据)是成熟约定,能在同一文件中实现“内容共存、解析分离”。"
},
"ja": {
"title": "SKILL.md で YAML フロントマター + Markdown 本文",
diff --git a/web/src/data/annotations/s06.json b/web/src/data/annotations/s06.json
index fae1f739c..85360e883 100644
--- a/web/src/data/annotations/s06.json
+++ b/web/src/data/annotations/s06.json
@@ -8,7 +8,8 @@
"alternatives": "A single compression strategy (e.g., always summarize at 80% capacity) would be simpler but wasteful -- most of the time, microcompact alone keeps things manageable. A sliding window (drop oldest N messages) is cheap but loses important context. The three-layer approach gives the best token efficiency: cheap cleanup constantly, expensive summarization rarely.",
"zh": {
"title": "三层压缩策略",
- "description": "上下文管理使用三个独立的层次,各有不同的成本收益比。(1) 微压缩每轮都运行,几乎零成本:它截断旧消息中的 tool_result 块,去除不再需要的冗长命令输出。(2) 自动压缩在 token 数超过阈值时触发:调用 LLM 生成对话摘要,代价高但能大幅缩减上下文。(3) 手动压缩由用户触发,用于明确的'重新开始'场景。分层意味着低成本操作持续运行(保持上下文整洁),而高成本操作很少触发(仅在真正需要时)。"
+ "description": "上下文管理使用三个独立的层次,各有不同的成本收益比。(1) 微压缩每轮都运行,几乎零成本:它截断旧消息中的 tool_result 块,去除不再需要的冗长命令输出。(2) 自动压缩在 token 数超过阈值时触发:调用 LLM 生成对话摘要,代价高但能大幅缩减上下文。(3) 手动压缩由用户触发,用于明确的'重新开始'场景。分层意味着低成本操作持续运行(保持上下文整洁),而高成本操作很少触发(仅在真正需要时)。",
+ "alternatives": "只用单一压缩策略(例如固定在 80% 时摘要)实现更简单,但通常浪费资源。多数轮次仅靠微压缩就足够;而滑动窗口(直接丢最旧消息)虽然便宜,却可能丢掉关键上下文。三层策略在成本与效果之间更均衡。"
},
"ja": {
"title": "3層圧縮戦略",
@@ -22,7 +23,8 @@
"alternatives": "A percentage-based threshold (compress when context is 80% full) adapts to different context window sizes but doesn't account for the fixed cost of generating a summary. A fixed threshold of 10K would compress more aggressively but often isn't worth it. The 20K value was chosen empirically: it's the point where compression savings consistently outweigh the quality loss from summarization.",
"zh": {
"title": "最小节省量 = 20,000 Token 才触发压缩",
- "description": "自动压缩仅在估算节省量(当前 token 数减去预估摘要大小)超过 20,000 token 时才触发。压缩不是免费的:摘要本身会消耗 token,还有生成摘要的 API 调用成本。如果对话只有 25,000 token,压缩可能节省 5,000 token,但需要一次 API 调用,且产出的摘要可能不如原文连贯。20K 的阈值确保只在节省量明显超过开销时才进行压缩。"
+ "description": "自动压缩仅在估算节省量(当前 token 数减去预估摘要大小)超过 20,000 token 时才触发。压缩不是免费的:摘要本身会消耗 token,还有生成摘要的 API 调用成本。如果对话只有 25,000 token,压缩可能节省 5,000 token,但需要一次 API 调用,且产出的摘要可能不如原文连贯。20K 的阈值确保只在节省量明显超过开销时才进行压缩。",
+ "alternatives": "按百分比阈值(如 80%)能适配不同窗口大小,但忽略了“生成摘要”本身的固定成本。把阈值降到 10K 会更激进,却常得不偿失。20K 是经验上更稳的点:节省量通常明显高于压缩开销。"
},
"ja": {
"title": "圧縮前に MIN_SAVINGS = 20,000 トークンが必要",
@@ -36,7 +38,8 @@
"alternatives": "Keeping the last 5-10 messages alongside the summary preserves recent detail and gives the model more to work with. But it creates the overlap problem described above, and makes the total context size less predictable. Some systems use a 'sliding window + summary' approach which works but requires careful tuning of the overlap region.",
"zh": {
"title": "摘要替换全部消息,而非保留部分历史",
- "description": "自动压缩触发时,生成摘要并替换全部消息历史,不会在摘要旁保留最近的 N 条消息。这避免了一个微妙的连贯性问题:如果同时保留近期消息和旧消息的摘要,模型会看到重叠内容的两种表示。摘要可能说'我们决定使用方案 X',而近期消息仍在展示讨论过程,产生矛盾信号。干净的摘要是一个连贯的单一叙述。"
+ "description": "自动压缩触发时,生成摘要并替换全部消息历史,不会在摘要旁保留最近的 N 条消息。这避免了一个微妙的连贯性问题:如果同时保留近期消息和旧消息的摘要,模型会看到重叠内容的两种表示。摘要可能说'我们决定使用方案 X',而近期消息仍在展示讨论过程,产生矛盾信号。干净的摘要是一个连贯的单一叙述。",
+ "alternatives": "保留最近 5-10 条消息再配合摘要看似更稳,但会产生语义重叠:同一事实在“原消息 + 摘要”双重出现,模型更容易收到冲突信号,同时上下文规模也更难预测。"
},
"ja": {
"title": "要約が部分的な履歴ではなく全メッセージを置換",
@@ -50,7 +53,8 @@
"alternatives": "Not archiving saves disk space but makes debugging hard -- when the agent makes a mistake, you can't see what it was 'thinking' 200 messages ago because that context was compressed away. Database storage (SQLite) would provide queryability but adds a dependency. JSONL is the simplest format that supports append-only writes and line-by-line processing.",
"zh": {
"title": "完整对话以 JSONL 格式归档到磁盘",
- "description": "尽管上下文在内存中被压缩,完整的未压缩对话仍会追加到磁盘上的 JSONL 文件中。每条消息、每次工具调用、每个结果都不会丢失。压缩对内存上下文是有损操作,但对永久记录是无损的。事后分析(调试 agent 行为、计算 token 用量、提取训练数据)始终可以基于完整记录进行。JSONL 格式仅追加写入,对并发写入安全,易于流式处理。"
+ "description": "尽管上下文在内存中被压缩,完整的未压缩对话仍会追加到磁盘上的 JSONL 文件中。每条消息、每次工具调用、每个结果都不会丢失。压缩对内存上下文是有损操作,但对永久记录是无损的。事后分析(调试 agent 行为、计算 token 用量、提取训练数据)始终可以基于完整记录进行。JSONL 格式仅追加写入,对并发写入安全,易于流式处理。",
+ "alternatives": "不归档可以省磁盘,但调试会很痛苦:当 agent 出错时,你无法回看 200 条消息前发生了什么。改用数据库(如 SQLite)能提升查询能力,但会增加依赖与维护成本。JSONL 是最小实现成本下的可靠方案。"
},
"ja": {
"title": "完全な会話を JSONL としてディスクに保存",
diff --git a/web/src/data/architecture-blueprints.ts b/web/src/data/architecture-blueprints.ts
new file mode 100644
index 000000000..f7467fe15
--- /dev/null
+++ b/web/src/data/architecture-blueprints.ts
@@ -0,0 +1,535 @@
+import type { VersionId } from "@/lib/constants";
+
+type LocalizedText = {
+ zh: string;
+ en: string;
+ ja?: string;
+};
+
+export type ArchitectureSliceId =
+ | "mainline"
+ | "control"
+ | "state"
+ | "lanes";
+
+export interface ArchitectureItem {
+ name: LocalizedText;
+ detail: LocalizedText;
+ fresh?: boolean;
+}
+
+export interface ArchitectureBlueprint {
+ summary: LocalizedText;
+ slices: Partial
>;
+ records: ArchitectureItem[];
+ handoff: LocalizedText[];
+}
+
+const l = (zh: string, en: string): LocalizedText => ({ zh, en });
+
+export const ARCHITECTURE_BLUEPRINTS: Record = {
+ s01: {
+ summary: l(
+ "第一章先建立最小闭环:用户输入进入 messages[],模型决定要不要调工具,结果再回写到同一条循环里。",
+ "The first chapter establishes the smallest closed loop: user input enters messages[], the model decides whether to call a tool, and the result flows back into the same loop."
+ ),
+ slices: {
+ mainline: [
+ { name: l("Agent Loop", "Agent Loop"), detail: l("每轮都走一次调用模型 -> 处理输出 -> 再决定是否继续。", "Each turn calls the model, handles the output, then decides whether to continue."), fresh: true },
+ ],
+ state: [
+ { name: l("messages[]", "messages[]"), detail: l("所有用户、助手和工具结果都累积在这里。", "User, assistant, and tool result history accumulates here."), fresh: true },
+ { name: l("tool_result 回流", "tool_result write-back"), detail: l("真正让 agent 能行动的是工具结果会回到下一轮推理。", "The agent becomes real when tool results return into the next reasoning step."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("LoopState", "LoopState"), detail: l("最小可运行会话状态。", "The smallest runnable session state."), fresh: true },
+ { name: l("Assistant Content", "Assistant Content"), detail: l("模型本轮输出。", "The model output for the current turn."), fresh: true },
+ ],
+ handoff: [
+ l("用户消息进入 messages[]", "User message enters messages[]"),
+ l("模型产出 tool_use 或文本", "Model emits tool_use or text"),
+ l("工具结果回写到下一轮", "Tool result writes back into the next turn"),
+ ],
+ },
+ s02: {
+ summary: l(
+ "这一章把“会调一个工具”升级成“能稳定路由很多工具”,主循环不变,工具层长出来。",
+ "This chapter upgrades one tool call into a stable multi-tool routing layer while keeping the main loop unchanged."
+ ),
+ slices: {
+ mainline: [
+ { name: l("稳定主循环", "Stable Main Loop"), detail: l("主循环继续只管模型调用与结果回写。", "The main loop still only owns model calls and write-back."), },
+ ],
+ control: [
+ { name: l("ToolSpec 目录", "ToolSpec Catalog"), detail: l("把工具能力描述给模型看。", "Describes tool capabilities to the model."), fresh: true },
+ { name: l("Dispatch Map", "Dispatch Map"), detail: l("按工具名把调用路由到对应 handler。", "Routes a tool call to the correct handler by name."), fresh: true },
+ ],
+ state: [
+ { name: l("tool_input", "tool_input"), detail: l("模型传入的结构化工具参数。", "Structured tool arguments emitted by the model."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("ToolSpec", "ToolSpec"), detail: l("schema + 描述。", "Schema plus description."), fresh: true },
+ { name: l("Dispatch Entry", "Dispatch Entry"), detail: l("工具名到函数的映射。", "Mapping from tool name to function."), fresh: true },
+ ],
+ handoff: [
+ l("模型说要调哪个工具", "The model selects a tool"),
+ l("dispatch map 找到 handler", "The dispatch map resolves the handler"),
+ l("handler 输出 tool_result", "The handler returns a tool_result"),
+ ],
+ },
+ s03: {
+ summary: l(
+ "第三章把会话内的工作拆解显式化,agent 开始有一块自己的 session planning 状态。",
+ "The third chapter makes session planning explicit so the agent gains a dedicated session-planning state."
+ ),
+ slices: {
+ mainline: [
+ { name: l("计划先行", "Plan Before Execution"), detail: l("先把大目标拆成当前轮可追踪步骤,再去行动。", "Break the larger goal into trackable steps before acting."), fresh: true },
+ ],
+ control: [
+ { name: l("提醒回路", "Reminder Loop"), detail: l("每轮重新看到当前 todo,避免中途漂移。", "Each turn revisits the current todo list to avoid drift."), fresh: true },
+ ],
+ state: [
+ { name: l("TodoItem", "TodoItem"), detail: l("当前会话里的最小计划单位。", "The smallest planning unit inside one session."), fresh: true },
+ { name: l("PlanState", "PlanState"), detail: l("记录有哪些步骤、做到了哪一步。", "Tracks what steps exist and which one is active."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("Todo List", "Todo List"), detail: l("会话级,不持久。", "Session-scoped, not durable."), fresh: true },
+ ],
+ handoff: [
+ l("目标先变成步骤", "The goal becomes steps first"),
+ l("当前步骤指导工具选择", "The current step guides tool choice"),
+ l("进展再回写计划状态", "Progress writes back into planning state"),
+ ],
+ },
+ s04: {
+ summary: l(
+ "这里开始把子任务从父上下文中隔离出来,系统第一次有了显式的多循环结构。",
+ "This chapter isolates subtasks from the parent context and introduces the first explicit multi-loop structure."
+ ),
+ slices: {
+ mainline: [
+ { name: l("父循环", "Parent Loop"), detail: l("保持主线目标和最终整合责任。", "Keeps the main goal and the integration responsibility."), },
+ { name: l("子循环", "Child Loop"), detail: l("为子任务提供一份干净上下文。", "Provides a clean context for the subtask."), fresh: true },
+ ],
+ control: [
+ { name: l("委派边界", "Delegation Boundary"), detail: l("什么时候把工作交给子 agent,什么时候留在父循环。", "Defines when work is delegated versus kept in the parent loop."), fresh: true },
+ ],
+ state: [
+ { name: l("Parent messages", "Parent messages"), detail: l("父 agent 的长期上下文。", "The parent agent's long-lived context."), },
+ { name: l("Child messages", "Child messages"), detail: l("子任务一次性的独立上下文。", "An isolated one-shot context for the delegated subtask."), fresh: true },
+ ],
+ lanes: [
+ { name: l("一次性 Subagent", "One-shot Subagent"), detail: l("做完摘要后就退出,不承担长期身份。", "Exits after returning a summary and does not keep long-lived identity."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("Subtask Request", "Subtask Request"), detail: l("父循环交给子循环的边界对象。", "The boundary object handed from parent to child."), fresh: true },
+ ],
+ handoff: [
+ l("父循环定义子任务", "The parent loop defines a subtask"),
+ l("子循环在独立 messages 里执行", "The child loop runs in isolated messages"),
+ l("摘要回到父循环继续主线", "A summary returns to the parent loop"),
+ ],
+ },
+ s05: {
+ summary: l(
+ "技能系统把知识获取拆成发现层和按需加载层,避免把所有说明一开始全塞进 prompt。",
+ "The skill system splits knowledge into a discovery layer and an on-demand loading layer so the prompt does not start bloated."
+ ),
+ slices: {
+ control: [
+ { name: l("Skill Discovery", "Skill Discovery"), detail: l("先用便宜方式知道有哪些技能可用。", "Learns which skills exist through a cheap discovery pass."), fresh: true },
+ { name: l("Skill Load", "Skill Load"), detail: l("真正需要时再把深说明注入。", "Loads deep instructions only when they are actually needed."), fresh: true },
+ ],
+ state: [
+ { name: l("Skill Registry", "Skill Registry"), detail: l("保存技能名字、简介和路径。", "Stores skill names, summaries, and paths."), fresh: true },
+ ],
+ mainline: [
+ { name: l("主循环保持轻量", "Keep the Loop Lightweight"), detail: l("技能不是固定写进系统 prompt,而是按需补进当前轮。", "Skills are injected on demand instead of being permanently fused into the system prompt."), },
+ ],
+ },
+ records: [
+ { name: l("SKILL.md", "SKILL.md"), detail: l("技能的深说明载体。", "The deep instruction source for a skill."), fresh: true },
+ ],
+ handoff: [
+ l("先发现技能入口", "Discover the skill entry first"),
+ l("需要时读取 SKILL.md", "Read SKILL.md when needed"),
+ l("再把结果回注给主循环", "Feed the loaded result back into the main loop"),
+ ],
+ },
+ s06: {
+ summary: l(
+ "上下文压缩让系统第一次区分活跃窗口和被转移出去的细节,长会话开始变得可持续。",
+ "Context compaction is where the system first separates the active window from offloaded detail so long sessions stay usable."
+ ),
+ slices: {
+ control: [
+ { name: l("压缩触发器", "Compaction Trigger"), detail: l("接近 token 上限时决定何时压缩。", "Decides when to compact as the token budget grows."), fresh: true },
+ { name: l("微压缩与摘要压缩", "Micro and Summary Compaction"), detail: l("按损失程度分两层压缩。", "Compacts in layers with different levels of loss."), fresh: true },
+ ],
+ state: [
+ { name: l("活跃上下文", "Active Context"), detail: l("当前轮必须直接看到的内容。", "What the current turn must see directly."), },
+ { name: l("Persisted Output", "Persisted Output"), detail: l("被移出活跃窗口但仍可再读的细节。", "Detail moved out of the active window but still readable later."), fresh: true },
+ { name: l("Summary State", "Summary State"), detail: l("压缩后保留下来的主线。", "The retained storyline after compaction."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("Micro Compact Record", "Micro Compact Record"), detail: l("短期挪走细节。", "Moves recent detail out of the hot window."), fresh: true },
+ { name: l("Summary Compact", "Summary Compact"), detail: l("保住主线连续性。", "Preserves continuity of the mainline."), fresh: true },
+ ],
+ handoff: [
+ l("细节先移出活跃窗口", "Detail leaves the active window first"),
+ l("主线被压成摘要", "The mainline is preserved as a summary"),
+ l("后续真需要时再读回原文", "Raw detail is read back only when needed"),
+ ],
+ },
+ s07: {
+ summary: l(
+ "从这一章开始,执行前出现了真正的控制面闸门:模型意图必须先变成可判断的权限请求。",
+ "From this chapter onward, execution gets a real control-plane gate: model intent must become a permission request before it runs."
+ ),
+ slices: {
+ control: [
+ { name: l("Permission Gate", "Permission Gate"), detail: l("deny / ask / allow 决策发生在执行之前。", "deny / ask / allow happens before execution."), fresh: true },
+ { name: l("模式控制", "Mode Control"), detail: l("default、plan、auto 等模式影响整条权限路径。", "Modes such as default, plan, and auto affect the whole permission path."), fresh: true },
+ ],
+ state: [
+ { name: l("PermissionRule", "PermissionRule"), detail: l("定义哪些工具或路径直接允许、拒绝或询问。", "Defines which tools or paths are allowed, denied, or sent for confirmation."), fresh: true },
+ { name: l("PermissionDecision", "PermissionDecision"), detail: l("把 allow / ask / deny 结构化回写。", "Writes allow / ask / deny back in structured form."), fresh: true },
+ ],
+ mainline: [
+ { name: l("主循环不再直达工具", "The Loop No Longer Reaches Tools Directly"), detail: l("tool call 先过权限层,再决定是否真正执行。", "A tool call passes through the permission layer before actual execution."), },
+ ],
+ },
+ records: [
+ { name: l("Normalized Intent", "Normalized Intent"), detail: l("把原始工具调用翻译成可判断对象。", "Translates raw tool calls into a policy-checkable object."), fresh: true },
+ ],
+ handoff: [
+ l("模型提出动作", "The model proposes an action"),
+ l("权限层做出 allow / ask / deny", "The permission layer returns allow / ask / deny"),
+ l("结果回写给主循环继续推理", "That result writes back into the main loop"),
+ ],
+ },
+ s08: {
+ summary: l(
+ "Hook 让主循环第一次拥有稳定的旁路扩展点,日志、审计、追踪开始从核心逻辑中分离。",
+ "Hooks give the loop stable sidecar extension points so logging, audit, and tracing separate from the core path."
+ ),
+ slices: {
+ control: [
+ { name: l("Lifecycle Events", "Lifecycle Events"), detail: l("主循环在 pre_tool / post_tool / on_error 等边界发出事件。", "The loop emits events at boundaries like pre_tool, post_tool, and on_error."), fresh: true },
+ { name: l("Hook Registry", "Hook Registry"), detail: l("多个 hook 共享同一事件契约。", "Multiple hooks share one event contract."), fresh: true },
+ ],
+ state: [
+ { name: l("HookEvent", "HookEvent"), detail: l("tool、input、result、error 等结构化事件包。", "A structured event envelope carrying tool, input, result, error, and more."), fresh: true },
+ ],
+ mainline: [
+ { name: l("主线保持最小", "Keep the Mainline Small"), detail: l("副作用通过 hook 附着,不侵入每个工具 handler。", "Side effects attach through hooks instead of invading every handler."), },
+ ],
+ },
+ records: [
+ { name: l("Audit Sink", "Audit Sink"), detail: l("一个具体副作用落点。", "A concrete side-effect sink."), fresh: true },
+ ],
+ handoff: [
+ l("主循环发事件", "The loop emits an event"),
+ l("Hook 观察并产出副作用", "Hooks observe and produce side effects"),
+ l("主线继续推进不被重写", "The mainline continues without being rewritten"),
+ ],
+ },
+ s09: {
+ summary: l(
+ "长期记忆把跨会话事实从即时上下文里分层出来,系统第一次有了真正的 durable knowledge 容器。",
+ "Long-term memory layers cross-session facts away from immediate context and introduces a real durable knowledge container."
+ ),
+ slices: {
+ control: [
+ { name: l("Memory Load/Write", "Memory Load/Write"), detail: l("模型调用前读取,任务结束后提炼并写回。", "Load before the model call, then extract and write after the work turn."), fresh: true },
+ ],
+ state: [
+ { name: l("messages[]", "messages[]"), detail: l("承载当前过程,不负责跨会话长期知识。", "Carries the live process, not long-term cross-session knowledge."), },
+ { name: l("Memory Store", "Memory Store"), detail: l("只保存跨会话仍然有价值的事实。", "Stores only durable facts that still matter across sessions."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("MemoryEntry", "MemoryEntry"), detail: l("用户偏好、项目约束等长期事实。", "Long-lived facts such as preferences and project constraints."), fresh: true },
+ ],
+ handoff: [
+ l("先读取相关 memory", "Relevant memory is loaded first"),
+ l("主循环完成本轮工作", "The main loop completes the current turn"),
+ l("再把新事实提炼写回", "New durable facts are extracted and written back"),
+ ],
+ },
+ s10: {
+ summary: l(
+ "系统输入在这里变成装配流水线,模型看到的不再是一段神秘大 prompt,而是一组有边界的输入片段。",
+ "System input becomes an assembly pipeline here: the model no longer sees one giant mysterious prompt, but a bounded set of input sections."
+ ),
+ slices: {
+ control: [
+ { name: l("Prompt Builder", "Prompt Builder"), detail: l("按顺序装配稳定规则、运行时状态、工具和记忆。", "Assembles stable policy, runtime state, tools, and memory in a visible order."), fresh: true },
+ ],
+ state: [
+ { name: l("Prompt Parts", "Prompt Parts"), detail: l("每一段输入都有单独边界。", "Each input fragment has its own explicit boundary."), fresh: true },
+ { name: l("Runtime Context", "Runtime Context"), detail: l("工作目录、任务状态、记忆等运行时片段。", "Runtime fragments such as workspace state, task state, and memory."), fresh: true },
+ ],
+ mainline: [
+ { name: l("模型输入构建", "Model Input Construction"), detail: l("主循环在调用模型前先构建完整输入。", "The loop constructs the full input before calling the model."), },
+ ],
+ },
+ records: [
+ { name: l("Section Order", "Section Order"), detail: l("哪一段先拼、哪一段后拼。", "Which fragment is assembled first versus later."), fresh: true },
+ ],
+ handoff: [
+ l("稳定策略先装配", "Stable policy is assembled first"),
+ l("运行时片段再注入", "Runtime fragments are injected next"),
+ l("最终输入才交给模型", "Only then does the final input reach the model"),
+ ],
+ },
+ s11: {
+ summary: l(
+ "错误恢复把失败正式纳入状态机,系统开始显式记录为什么继续、为什么重试、为什么停止。",
+ "Error recovery formally brings failure into the state machine so the system records why it continues, retries, or stops."
+ ),
+ slices: {
+ control: [
+ { name: l("Recovery Manager", "Recovery Manager"), detail: l("按失败类型选择 retry、fallback、ask 或 stop。", "Chooses retry, fallback, ask, or stop by failure type."), fresh: true },
+ ],
+ state: [
+ { name: l("Continuation Reason", "Continuation Reason"), detail: l("把“为什么继续”写成可见状态。", "Makes the reason for continuation visible state."), fresh: true },
+ { name: l("Retry Bounds", "Retry Bounds"), detail: l("限制恢复分支不会无限循环。", "Prevents recovery branches from looping forever."), fresh: true },
+ ],
+ mainline: [
+ { name: l("失败仍回到主循环", "Failures Still Return to the Loop"), detail: l("失败不是丢掉,而是带着恢复语义回写。", "Failures are not discarded; they write back with recovery semantics."), },
+ ],
+ },
+ records: [
+ { name: l("RecoveryState", "RecoveryState"), detail: l("错误分类和恢复分支状态。", "The error classification and branch state."), fresh: true },
+ ],
+ handoff: [
+ l("工具失败先分类", "A tool failure is classified first"),
+ l("恢复层选择分支", "The recovery layer chooses a branch"),
+ l("继续原因写回主循环", "The continuation reason returns to the main loop"),
+ ],
+ },
+ s12: {
+ summary: l(
+ "任务系统第一次把会话步骤提升成 durable work graph,系统开始能跨轮次推进一组真正的工作节点。",
+ "The task system is where session steps become a durable work graph that can progress real work nodes across turns."
+ ),
+ slices: {
+ control: [
+ { name: l("Unlock Rules", "Unlock Rules"), detail: l("完成一个任务后检查哪些后继节点可以开始。", "Checks which downstream nodes can start once one task completes."), fresh: true },
+ ],
+ state: [
+ { name: l("Task Board", "Task Board"), detail: l("所有工作节点的持久记录面。", "The durable record surface for all work nodes."), fresh: true },
+ { name: l("Dependency Edges", "Dependency Edges"), detail: l("blockedBy / blocks 记录谁依赖谁。", "blockedBy / blocks record who depends on whom."), fresh: true },
+ ],
+ mainline: [
+ { name: l("任务与会话分层", "Tasks Layer Away From the Session"), detail: l("会话内的 todo 退到次要位置,durable task 进入主设计。", "Session-local todo becomes secondary while durable tasks enter the main architecture."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("TaskRecord", "TaskRecord"), detail: l("目标、状态、依赖、owner 等持久字段。", "Durable fields for goal, status, dependencies, owner, and more."), fresh: true },
+ ],
+ handoff: [
+ l("任务节点被创建", "A task node is created"),
+ l("依赖边决定何时 ready", "Dependency edges decide when work becomes ready"),
+ l("完成后解锁后继节点", "Completion unlocks downstream nodes"),
+ ],
+ },
+ s13: {
+ summary: l(
+ "后台任务把“这项工作存在”和“这次执行正在跑”两层彻底分开,runtime record 正式成立。",
+ "Background tasks fully separate the existence of work from one live execution attempt, which is where runtime records become first-class."
+ ),
+ slices: {
+ control: [
+ { name: l("Notification Drain", "Notification Drain"), detail: l("下一轮调用模型前先把后台摘要带回。", "Drains background notifications before the next model call."), fresh: true },
+ ],
+ state: [
+ { name: l("Task Goal", "Task Goal"), detail: l("durable task 仍在任务板上。", "The durable task goal still lives on the task board."), },
+ { name: l("RuntimeTaskRecord", "RuntimeTaskRecord"), detail: l("描述一条正在跑或跑完的执行槽位。", "Describes one running or completed execution slot."), fresh: true },
+ { name: l("output_file", "output_file"), detail: l("完整产物落盘,通知只带 preview。", "The full artifact goes to disk while notifications carry only a preview."), fresh: true },
+ ],
+ lanes: [
+ { name: l("后台执行线", "Background Execution Slot"), detail: l("慢命令在旁路执行,主循环继续前进。", "Slow commands execute on a side path while the main loop keeps moving."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("Notification", "Notification"), detail: l("结果回流桥梁。", "The bridge back into the main loop."), fresh: true },
+ ],
+ handoff: [
+ l("主循环创建 runtime record", "The loop creates a runtime record"),
+ l("后台槽位执行慢命令", "A background slot runs the slow command"),
+ l("notification + output_file 回到主系统", "notification plus output_file returns to the main system"),
+ ],
+ },
+ s14: {
+ summary: l(
+ "Cron 调度把时间从“外部条件”变成系统内正式的触发源,但执行权仍然交给 runtime 层。",
+ "The cron scheduler makes time a first-class trigger source while still handing execution off to the runtime layer."
+ ),
+ slices: {
+ control: [
+ { name: l("Schedule Matcher", "Schedule Matcher"), detail: l("只负责判断规则是否命中。", "Only decides whether a rule matches."), fresh: true },
+ ],
+ state: [
+ { name: l("ScheduleRecord", "ScheduleRecord"), detail: l("记录何时触发什么。", "Records what should trigger and when."), fresh: true },
+ { name: l("RuntimeTaskRecord", "RuntimeTaskRecord"), detail: l("命中后生成的具体执行实例。", "The concrete runtime instance created after a match."), },
+ ],
+ lanes: [
+ { name: l("时间触发面", "Time Trigger Surface"), detail: l("cron tick 只是触发面,不直接执行业务。", "A cron tick is only a trigger surface, not the business execution itself."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("Trigger Event", "Trigger Event"), detail: l("一次规则命中。", "One rule-match occurrence."), fresh: true },
+ ],
+ handoff: [
+ l("cron 规则命中", "A cron rule matches"),
+ l("生成 runtime task", "A runtime task is created"),
+ l("后台运行时接管执行", "The background runtime takes over execution"),
+ ],
+ },
+ s15: {
+ summary: l(
+ "这里开始从单执行者迈向长期团队,persistent teammate、roster 和 inbox 成为新的平台骨架。",
+ "This is where the system moves from one executor toward a long-lived team with persistent teammates, a roster, and inboxes."
+ ),
+ slices: {
+ control: [
+ { name: l("Lead Orchestrator", "Lead Orchestrator"), detail: l("维护 roster、分配职责、观察团队状态。", "Maintains the roster, assigns work, and watches team state."), fresh: true },
+ ],
+ state: [
+ { name: l("Team Roster", "Team Roster"), detail: l("记录每个队友的名字、角色和状态。", "Stores each teammate's name, role, and status."), fresh: true },
+ { name: l("Inbox", "Inbox"), detail: l("每个队友独立的消息边界。", "A separate message boundary for each teammate."), fresh: true },
+ ],
+ lanes: [
+ { name: l("Persistent Teammate", "Persistent Teammate"), detail: l("长期存在、可反复接活的执行者。", "A long-lived worker that can take repeated assignments."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("TeamMember", "TeamMember"), detail: l("长期身份,不是一次性委派结果。", "A long-lived identity, not a one-shot delegation result."), fresh: true },
+ { name: l("MessageEnvelope", "MessageEnvelope"), detail: l("邮箱里的结构化消息。", "A structured message carried through inboxes."), fresh: true },
+ ],
+ handoff: [
+ l("lead 指定职责", "The lead defines responsibility"),
+ l("消息进入队友 inbox", "Messages enter the teammate inbox"),
+ l("队友独立执行并回信", "The teammate runs independently and replies"),
+ ],
+ },
+ s16: {
+ summary: l(
+ "团队协议把协作从自由文本升级成结构化请求流,request_id 和 durable request record 成为新主线。",
+ "Team protocols upgrade collaboration from free-form text into structured request flows centered on request_id and durable request records."
+ ),
+ slices: {
+ control: [
+ { name: l("Protocol Envelope", "Protocol Envelope"), detail: l("type、from、to、request_id、payload 这类固定外壳。", "A fixed envelope with type, from, to, request_id, and payload."), fresh: true },
+ { name: l("Protocol State Machine", "Protocol State Machine"), detail: l("pending / approved / rejected / expired。", "pending / approved / rejected / expired."), fresh: true },
+ ],
+ state: [
+ { name: l("Request Store", "Request Store"), detail: l("把协议请求变成 durable request record。", "Turns protocol requests into durable request records."), fresh: true },
+ ],
+ lanes: [
+ { name: l("协议协作通道", "Protocol Collaboration Channel"), detail: l("审批、关机、交接这类协作都走同一种 request/response 模型。", "Approvals, shutdowns, and handoffs all use the same request/response model."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("RequestRecord", "RequestRecord"), detail: l("协议工作的真正状态中心。", "The real state center of a protocol workflow."), fresh: true },
+ ],
+ handoff: [
+ l("发出协议请求", "A protocol request is sent"),
+ l("request_id 绑定状态记录", "request_id binds the durable state record"),
+ l("明确响应回写状态机", "An explicit response writes back into the state machine"),
+ ],
+ },
+ s17: {
+ summary: l(
+ "自治章节把队友从“等待派活”推进到“按 claim policy 自己找活并恢复上下文”,平台开始真正自己运转。",
+ "The autonomy chapter moves teammates from waiting for assignments to self-claiming eligible work under a claim policy and resuming with context."
+ ),
+ slices: {
+ control: [
+ { name: l("Idle Poll Loop", "Idle Poll Loop"), detail: l("空闲时按节奏检查 inbox 和 task board。", "Checks inboxes and the task board on a cadence during idle time."), fresh: true },
+ { name: l("Claim Policy", "Claim Policy"), detail: l("只有满足角色与状态条件的任务才允许 auto-claim。", "Only tasks that satisfy role and state conditions may be auto-claimed."), fresh: true },
+ ],
+ state: [
+ { name: l("Claim Events", "Claim Events"), detail: l("记录是谁因什么来源认领了任务。", "Records who claimed a task and from which source."), fresh: true },
+ { name: l("Durable Requests", "Durable Requests"), detail: l("自治队友继续继承上一章的协议请求状态。", "Autonomous teammates still inherit durable protocol request state from the previous chapter."), },
+ ],
+ lanes: [
+ { name: l("Autonomous Worker", "Autonomous Worker"), detail: l("空闲时自己发现可做工作,再恢复执行。", "Discovers eligible work while idle, then resumes execution."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("Claimable Predicate", "Claimable Predicate"), detail: l("判定任务是否可由当前角色认领。", "Decides whether the current role may claim a task."), fresh: true },
+ ],
+ handoff: [
+ l("队友进入 idle poll", "The teammate enters idle polling"),
+ l("claim policy 选出可认领工作", "The claim policy selects eligible work"),
+ l("身份块重注入后恢复执行", "Identity is re-injected and execution resumes"),
+ ],
+ },
+ s18: {
+ summary: l(
+ "Worktree 章节把执行环境从主目录里拆开,任务继续表达目标,而 worktree 成为独立、可观察、可 closeout 的执行通道。",
+ "The worktree chapter pulls execution environments out of the main directory: tasks still express goals while worktrees become isolated, observable, closeout-capable lanes."
+ ),
+ slices: {
+ control: [
+ { name: l("Task-to-Lane Binding", "Task-to-Lane Binding"), detail: l("系统明确记录哪条任务用哪条执行通道。", "The system records which task is using which execution lane."), fresh: true },
+ { name: l("Closeout Semantics", "Closeout Semantics"), detail: l("收尾时显式决定 keep 还是 remove。", "Closeout explicitly decides whether to keep or remove the lane."), fresh: true },
+ ],
+ state: [
+ { name: l("Worktree Index", "Worktree Index"), detail: l("注册每条隔离车道的路径、分支和 task_id。", "Registers each isolated lane's path, branch, and task_id."), fresh: true },
+ { name: l("TaskRecord.worktree", "TaskRecord.worktree"), detail: l("任务记录里也能直接看到它当前在哪条 lane 上。", "The task record shows which lane it is currently using."), fresh: true },
+ { name: l("Event Log", "Event Log"), detail: l("create / enter / run / closeout 等生命周期事件。", "Lifecycle events such as create, enter, run, and closeout."), fresh: true },
+ ],
+ lanes: [
+ { name: l("Isolated Directory Lane", "Isolated Directory Lane"), detail: l("不同任务默认不共享未提交改动。", "Different tasks do not share uncommitted changes by default."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("WorktreeRecord", "WorktreeRecord"), detail: l("车道级执行记录。", "The execution record for one lane."), fresh: true },
+ { name: l("Closeout Record", "Closeout Record"), detail: l("保留或回收的显式结果。", "The explicit result of keep versus reclaim."), fresh: true },
+ ],
+ handoff: [
+ l("任务绑定到 worktree lane", "A task binds to a worktree lane"),
+ l("命令在隔离目录里执行", "Commands run inside the isolated directory"),
+ l("closeout 决定 lane 的最终去向", "Closeout decides the lane's final fate"),
+ ],
+ },
+ s19: {
+ summary: l(
+ "最后一章把本地工具、插件和 MCP 服务器(server)重新统一到同一能力总线(capability bus)下,外部能力终于回到原有控制面里。",
+ "The final chapter reunifies native tools, plugins, and MCP servers on one capability bus so external capability returns to the same control plane."
+ ),
+ slices: {
+ control: [
+ { name: l("能力路由器(Capability Router)", "Capability Router"), detail: l("先发现能力,再决定本地、插件还是 MCP 路由。", "Discovers capability first, then routes to native, plugin, or MCP."), fresh: true },
+ { name: l("共享权限闸门(Shared Permission Gate)", "Shared Permission Gate"), detail: l("外部能力和本地工具共用同一权限语义。", "External capabilities and native tools share one permission contract."), fresh: true },
+ { name: l("结果标准化器(Result Normalizer)", "Result Normalizer"), detail: l("远程结果也要转成主循环看得懂的标准 payload。", "Remote results are normalized into a payload the main loop already understands."), fresh: true },
+ ],
+ state: [
+ { name: l("插件清单(Plugin Manifest)", "Plugin Manifest"), detail: l("告诉系统有哪些外部服务器(server)可用。", "Tells the system which external servers are available."), fresh: true },
+ { name: l("能力视图(Capability View)", "Capability View"), detail: l("把本地 / 插件 / MCP 整理成一个可比较的能力面。", "Collects native, plugin, and MCP capability into one comparable view."), fresh: true },
+ ],
+ lanes: [
+ { name: l("原生工具(Native Tool)", "Native Tool"), detail: l("本地处理器(handler)。", "A local handler."), },
+ { name: l("MCP / 插件车道(Plugin Lane)", "MCP / Plugin Lane"), detail: l("外部服务器(server)或插件提供的远程能力。", "Remote capability provided by an external server or plugin."), fresh: true },
+ ],
+ },
+ records: [
+ { name: l("作用域能力(Scoped Capability)", "Scoped Capability"), detail: l("带服务器(server)/来源(source)/风险(risk)信息的能力对象。", "A capability object carrying server, source, and risk information."), fresh: true },
+ ],
+ handoff: [
+ l("先做能力发现(capability discovery)", "Capability discovery happens first"),
+ l("统一权限(permission)+ 路由(routing)", "Routing and permission stay unified"),
+ l("标准化结果再回写主循环", "A normalized result writes back into the main loop"),
+ ],
+ },
+};
diff --git a/web/src/data/execution-flows.ts b/web/src/data/execution-flows.ts
index 72ce54dd0..f3cfe271a 100644
--- a/web/src/data/execution-flows.ts
+++ b/web/src/data/execution-flows.ts
@@ -10,6 +10,148 @@ const COL_CENTER = FLOW_WIDTH / 2;
const COL_LEFT = 140;
const COL_RIGHT = FLOW_WIDTH - 140;
+const GENERIC_FLOWS: Record = {
+ s07: {
+ nodes: [
+ { id: "intent", label: "Model Intent", type: "start", x: COL_CENTER, y: 30 },
+ { id: "normalize", label: "Normalize\nAction", type: "process", x: COL_CENTER, y: 110 },
+ { id: "policy", label: "Permission\nPolicy?", type: "decision", x: COL_CENTER, y: 200 },
+ { id: "ask", label: "Ask User /\nReturn Deny", type: "subprocess", x: COL_LEFT, y: 300 },
+ { id: "execute", label: "Execute Tool", type: "subprocess", x: COL_RIGHT, y: 300 },
+ { id: "append", label: "Append Structured\nPermission Result", type: "process", x: COL_CENTER, y: 410 },
+ { id: "loop", label: "Continue Loop", type: "end", x: COL_CENTER, y: 500 },
+ ],
+ edges: [
+ { from: "intent", to: "normalize" },
+ { from: "normalize", to: "policy" },
+ { from: "policy", to: "ask", label: "deny / ask" },
+ { from: "policy", to: "execute", label: "allow" },
+ { from: "ask", to: "append" },
+ { from: "execute", to: "append" },
+ { from: "append", to: "loop" },
+ ],
+ },
+ s08: {
+ nodes: [
+ { id: "loop", label: "Main Loop", type: "start", x: COL_CENTER, y: 30 },
+ { id: "event", label: "Emit Lifecycle\nEvent", type: "process", x: COL_CENTER, y: 110 },
+ { id: "hook_check", label: "Hooks\nRegistered?", type: "decision", x: COL_CENTER, y: 200 },
+ { id: "dispatch", label: "Dispatch Hook\nEnvelope", type: "subprocess", x: COL_LEFT, y: 300 },
+ { id: "tool", label: "Run Core Tool\nPath", type: "subprocess", x: COL_RIGHT, y: 300 },
+ { id: "side_effect", label: "Audit / Trace /\nPolicy Side Effects", type: "process", x: COL_LEFT, y: 410 },
+ { id: "append", label: "Append Result\nand Continue", type: "end", x: COL_CENTER, y: 500 },
+ ],
+ edges: [
+ { from: "loop", to: "event" },
+ { from: "event", to: "hook_check" },
+ { from: "hook_check", to: "dispatch", label: "yes" },
+ { from: "hook_check", to: "tool", label: "no" },
+ { from: "dispatch", to: "side_effect" },
+ { from: "dispatch", to: "tool", label: "observe" },
+ { from: "side_effect", to: "append" },
+ { from: "tool", to: "append" },
+ ],
+ },
+ s09: {
+ nodes: [
+ { id: "start", label: "New Turn", type: "start", x: COL_CENTER, y: 30 },
+ { id: "load", label: "Load Relevant\nMemory", type: "subprocess", x: COL_CENTER, y: 110 },
+ { id: "assemble", label: "Assemble Prompt\nwith Memory", type: "process", x: COL_CENTER, y: 190 },
+ { id: "tool", label: "Run Work", type: "subprocess", x: COL_LEFT, y: 290 },
+ { id: "extract", label: "Extract Durable\nFacts", type: "process", x: COL_RIGHT, y: 290 },
+ { id: "persist", label: "Persist Memory", type: "subprocess", x: COL_RIGHT, y: 380 },
+ { id: "end", label: "Next Session /\nNext Turn", type: "end", x: COL_CENTER, y: 470 },
+ ],
+ edges: [
+ { from: "start", to: "load" },
+ { from: "load", to: "assemble" },
+ { from: "assemble", to: "tool" },
+ { from: "tool", to: "extract" },
+ { from: "extract", to: "persist" },
+ { from: "persist", to: "end" },
+ ],
+ },
+ s10: {
+ nodes: [
+ { id: "policy", label: "Stable Policy", type: "start", x: COL_CENTER, y: 30 },
+ { id: "runtime", label: "Runtime State", type: "process", x: COL_LEFT, y: 120 },
+ { id: "memory", label: "Memory /\nTask Context", type: "process", x: COL_RIGHT, y: 120 },
+ { id: "assemble", label: "Prompt Section\nAssembly", type: "subprocess", x: COL_CENTER, y: 220 },
+ { id: "model", label: "Model Call", type: "process", x: COL_CENTER, y: 320 },
+ { id: "tool", label: "Tool Loop / Text\nResponse", type: "decision", x: COL_CENTER, y: 410 },
+ { id: "end", label: "Append and\nContinue", type: "end", x: COL_CENTER, y: 500 },
+ ],
+ edges: [
+ { from: "policy", to: "runtime" },
+ { from: "policy", to: "memory" },
+ { from: "runtime", to: "assemble" },
+ { from: "memory", to: "assemble" },
+ { from: "assemble", to: "model" },
+ { from: "model", to: "tool" },
+ { from: "tool", to: "end", label: "visible input" },
+ ],
+ },
+ s11: {
+ nodes: [
+ { id: "tool", label: "Tool Result", type: "start", x: COL_CENTER, y: 30 },
+ { id: "error", label: "Error?", type: "decision", x: COL_CENTER, y: 120 },
+ { id: "append", label: "Append Result", type: "process", x: COL_RIGHT, y: 220 },
+ { id: "classify", label: "Classify Error", type: "subprocess", x: COL_LEFT, y: 220 },
+ { id: "branch", label: "Retry / Fallback /\nAsk User / Stop", type: "decision", x: COL_LEFT, y: 330 },
+ { id: "reason", label: "Write Continuation\nReason", type: "process", x: COL_CENTER, y: 430 },
+ { id: "loop", label: "Continue or Exit", type: "end", x: COL_CENTER, y: 520 },
+ ],
+ edges: [
+ { from: "tool", to: "error" },
+ { from: "error", to: "append", label: "no" },
+ { from: "error", to: "classify", label: "yes" },
+ { from: "classify", to: "branch" },
+ { from: "append", to: "loop" },
+ { from: "branch", to: "reason" },
+ { from: "reason", to: "loop" },
+ ],
+ },
+ s14: {
+ nodes: [
+ { id: "tick", label: "Cron Tick", type: "start", x: COL_CENTER, y: 30 },
+ { id: "match", label: "Rule Match?", type: "decision", x: COL_CENTER, y: 120 },
+ { id: "sleep", label: "Wait for Next\nTick", type: "end", x: COL_RIGHT, y: 120 },
+ { id: "spawn", label: "Create Runtime\nTask", type: "subprocess", x: COL_LEFT, y: 240 },
+ { id: "queue", label: "Queue for\nBackground Runtime", type: "process", x: COL_LEFT, y: 340 },
+ { id: "notify", label: "Notify Runtime /\nWrite Schedule Event", type: "process", x: COL_CENTER, y: 440 },
+ { id: "end", label: "Execution Continues\nElsewhere", type: "end", x: COL_CENTER, y: 530 },
+ ],
+ edges: [
+ { from: "tick", to: "match" },
+ { from: "match", to: "sleep", label: "no" },
+ { from: "match", to: "spawn", label: "yes" },
+ { from: "spawn", to: "queue" },
+ { from: "queue", to: "notify" },
+ { from: "notify", to: "end" },
+ ],
+ },
+ s19: {
+ nodes: [
+ { id: "request", label: "Capability\nRequest", type: "start", x: COL_CENTER, y: 30 },
+ { id: "discover", label: "Discover Native /\nPlugin / MCP", type: "process", x: COL_CENTER, y: 120 },
+ { id: "route", label: "Route to\nCapability", type: "decision", x: COL_CENTER, y: 210 },
+ { id: "native", label: "Native Tool", type: "subprocess", x: COL_LEFT, y: 320 },
+ { id: "external", label: "Plugin or MCP\nServer Call", type: "subprocess", x: COL_RIGHT, y: 320 },
+ { id: "normalize", label: "Normalize Result /\nApply Policy", type: "process", x: COL_CENTER, y: 430 },
+ { id: "append", label: "Append Back to\nMain Loop", type: "end", x: COL_CENTER, y: 520 },
+ ],
+ edges: [
+ { from: "request", to: "discover" },
+ { from: "discover", to: "route" },
+ { from: "route", to: "native", label: "local" },
+ { from: "route", to: "external", label: "plugin / mcp" },
+ { from: "native", to: "normalize" },
+ { from: "external", to: "normalize" },
+ { from: "normalize", to: "append" },
+ ],
+ },
+};
+
export const EXECUTION_FLOWS: Record = {
s01: {
nodes: [
@@ -142,14 +284,14 @@ export const EXECUTION_FLOWS: Record = {
{ from: "append", to: "compress_check" },
],
},
- s07: {
+ s12: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 },
- { id: "is_task", label: "task_manager?", type: "decision", x: COL_LEFT, y: 280 },
- { id: "crud", label: "CRUD Task\n(file-based)", type: "subprocess", x: 60, y: 370 },
- { id: "dep_check", label: "Check\nDependencies", type: "process", x: 60, y: 450 },
+ { id: "is_task", label: "task tool?", type: "decision", x: COL_LEFT, y: 280 },
+ { id: "crud", label: "Task Board CRUD\n(.tasks)", type: "subprocess", x: 60, y: 370 },
+ { id: "dep_check", label: "Unlock / Respect\nDependencies", type: "process", x: 60, y: 450 },
{ id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT + 80, y: 370 },
{ id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 530 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 },
@@ -167,16 +309,16 @@ export const EXECUTION_FLOWS: Record = {
{ from: "append", to: "llm" },
],
},
- s08: {
+ s13: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 },
- { id: "bg_check", label: "Background?", type: "decision", x: COL_LEFT, y: 280 },
- { id: "bg_spawn", label: "Spawn Thread", type: "subprocess", x: 60, y: 370 },
+ { id: "bg_check", label: "runtime tool?", type: "decision", x: COL_LEFT, y: 280 },
+ { id: "bg_spawn", label: "Spawn Runtime\nTask", type: "subprocess", x: 60, y: 370 },
{ id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT + 80, y: 370 },
{ id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 450 },
- { id: "notify", label: "Notification\nQueue", type: "process", x: 60, y: 450 },
+ { id: "notify", label: "Persist Runtime\nRecord + Notify", type: "process", x: 60, y: 450 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 },
],
edges: [
@@ -184,20 +326,20 @@ export const EXECUTION_FLOWS: Record = {
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "bg_check", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
- { from: "bg_check", to: "bg_spawn", label: "bg" },
- { from: "bg_check", to: "exec", label: "fg" },
+ { from: "bg_check", to: "bg_spawn", label: "runtime" },
+ { from: "bg_check", to: "exec", label: "sync" },
{ from: "bg_spawn", to: "notify" },
{ from: "exec", to: "append" },
{ from: "append", to: "llm" },
{ from: "notify", to: "llm" },
],
},
- s09: {
+ s15: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call\n(team lead)", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 200 },
- { id: "is_team", label: "Team tool?", type: "decision", x: COL_LEFT, y: 290 },
+ { id: "is_team", label: "team tool?", type: "decision", x: COL_LEFT, y: 290 },
{ id: "spawn", label: "Spawn\nTeammate", type: "subprocess", x: 60, y: 390 },
{ id: "msg", label: "Send Message\n(JSONL inbox)", type: "subprocess", x: 60, y: 470 },
{ id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT + 80, y: 390 },
@@ -219,14 +361,14 @@ export const EXECUTION_FLOWS: Record = {
{ from: "append", to: "llm" },
],
},
- s10: {
+ s16: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call\n(team lead)", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 200 },
- { id: "is_proto", label: "Protocol?", type: "decision", x: COL_LEFT, y: 290 },
- { id: "shutdown", label: "Shutdown\nRequest", type: "subprocess", x: 60, y: 390 },
- { id: "fsm", label: "FSM:\npending->approved", type: "process", x: 60, y: 470 },
+ { id: "is_proto", label: "protocol tool?", type: "decision", x: COL_LEFT, y: 290 },
+ { id: "shutdown", label: "Create Durable\nRequest", type: "subprocess", x: 60, y: 390 },
+ { id: "fsm", label: "Request Record:\npending -> resolved", type: "process", x: 60, y: 470 },
{ id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT + 80, y: 390 },
{ id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 550 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 290 },
@@ -237,7 +379,7 @@ export const EXECUTION_FLOWS: Record = {
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "is_proto", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
- { from: "is_proto", to: "shutdown", label: "shutdown" },
+ { from: "is_proto", to: "shutdown", label: "request" },
{ from: "is_proto", to: "exec", label: "other" },
{ from: "shutdown", to: "fsm" },
{ from: "fsm", to: "teammate" },
@@ -246,42 +388,43 @@ export const EXECUTION_FLOWS: Record = {
{ from: "append", to: "llm" },
],
},
- s11: {
+ s17: {
nodes: [
- { id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
- { id: "inbox", label: "Check Inbox", type: "process", x: COL_CENTER, y: 100 },
- { id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 180 },
- { id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 260 },
- { id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT, y: 340 },
- { id: "append", label: "Append Result", type: "process", x: COL_LEFT, y: 410 },
- { id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 340 },
- { id: "idle", label: "Idle Cycle", type: "process", x: COL_RIGHT, y: 420 },
- { id: "poll", label: "Poll Tasks\n+ Auto-Claim", type: "subprocess", x: COL_RIGHT, y: 500 },
+ { id: "start", label: "Resume /\nNew Work", type: "start", x: COL_CENTER, y: 30 },
+ { id: "identity", label: "Ensure Identity\nContext", type: "process", x: COL_CENTER, y: 110 },
+ { id: "llm", label: "LLM Work Turn", type: "process", x: COL_CENTER, y: 190 },
+ { id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 270 },
+ { id: "exec", label: "Execute Tool /\nProtocol Action", type: "subprocess", x: COL_LEFT, y: 350 },
+ { id: "append", label: "Append Result\nand Continue", type: "process", x: COL_LEFT, y: 430 },
+ { id: "idle", label: "Enter Idle\nPhase", type: "process", x: COL_RIGHT, y: 350 },
+ { id: "poll", label: "Inbox First,\nThen Claimable Tasks", type: "subprocess", x: COL_RIGHT, y: 430 },
+ { id: "claim", label: "Auto-Claim +\nWrite Claim Event", type: "process", x: COL_RIGHT, y: 520 },
],
edges: [
- { from: "start", to: "inbox" },
- { from: "inbox", to: "llm" },
+ { from: "start", to: "identity" },
+ { from: "identity", to: "llm" },
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "exec", label: "yes" },
- { from: "tool_check", to: "end", label: "no" },
+ { from: "tool_check", to: "idle", label: "idle" },
{ from: "exec", to: "append" },
{ from: "append", to: "llm" },
- { from: "end", to: "idle" },
{ from: "idle", to: "poll" },
- { from: "poll", to: "inbox" },
+ { from: "poll", to: "claim", label: "claimable task" },
+ { from: "poll", to: "identity", label: "inbox message" },
+ { from: "claim", to: "identity", label: "resume work" },
],
},
- s12: {
+ s18: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 },
{ id: "is_wt", label: "worktree tool?", type: "decision", x: COL_LEFT, y: 280 },
- { id: "task", label: "Task Board\\n(.tasks)", type: "process", x: 60, y: 360 },
- { id: "wt_create", label: "Allocate / Enter\\nWorktree", type: "subprocess", x: 60, y: 440 },
+ { id: "task", label: "Task State:\\nbind + worktree_state", type: "process", x: 60, y: 360 },
+ { id: "wt_create", label: "Create / Enter\\nWorktree Lane", type: "subprocess", x: 60, y: 440 },
{ id: "wt_run", label: "Run in\\nIsolated Dir", type: "subprocess", x: COL_LEFT + 80, y: 360 },
- { id: "wt_close", label: "Closeout:\\nworktree_keep / remove", type: "process", x: COL_LEFT + 80, y: 440 },
- { id: "events", label: "Emit Lifecycle Events\\n(side-channel)", type: "process", x: COL_RIGHT, y: 420 },
+ { id: "wt_close", label: "worktree_closeout\\nkeep | remove", type: "process", x: COL_LEFT + 80, y: 440 },
+ { id: "events", label: "Emit enter / run /\ncloseout events", type: "process", x: COL_RIGHT, y: 420 },
{ id: "events_read", label: "Optional Read\\nworktree_events", type: "subprocess", x: COL_RIGHT, y: 520 },
{ id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 530 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 },
@@ -292,13 +435,13 @@ export const EXECUTION_FLOWS: Record = {
{ from: "tool_check", to: "is_wt", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
{ from: "is_wt", to: "task", label: "task ops" },
- { from: "is_wt", to: "wt_create", label: "create/bind" },
+ { from: "is_wt", to: "wt_create", label: "create/enter" },
{ from: "is_wt", to: "wt_run", label: "run/status" },
{ from: "task", to: "wt_create", label: "allocate lane" },
{ from: "wt_create", to: "wt_run" },
{ from: "task", to: "append", label: "task result" },
- { from: "wt_create", to: "events", label: "emit create" },
- { from: "wt_create", to: "append", label: "create result" },
+ { from: "wt_create", to: "events", label: "emit create/enter" },
+ { from: "wt_create", to: "append", label: "create/enter result" },
{ from: "wt_run", to: "wt_close" },
{ from: "wt_run", to: "append", label: "run/status result" },
{ from: "wt_close", to: "events", label: "emit closeout" },
@@ -311,5 +454,5 @@ export const EXECUTION_FLOWS: Record = {
};
export function getFlowForVersion(version: string): FlowDefinition | null {
- return EXECUTION_FLOWS[version] ?? null;
+ return GENERIC_FLOWS[version] ?? EXECUTION_FLOWS[version] ?? null;
}
diff --git a/web/src/data/generated/docs.json b/web/src/data/generated/docs.json
index b0a3f8975..17ec65271 100644
--- a/web/src/data/generated/docs.json
+++ b/web/src/data/generated/docs.json
@@ -1,218 +1,974 @@
[
+ {
+ "version": null,
+ "slug": "data-structures",
+ "locale": "en",
+ "title": "Core Data Structures",
+ "kind": "bridge",
+ "filename": "data-structures.md",
+ "content": "# Core Data Structures\n\n> **Reference** -- Use this when you lose track of where state lives. Each record has one clear job.\n\nThe easiest way to get lost in an agent system is not feature count -- it is losing track of where the state actually lives. This document collects the core records that appear again and again across the mainline and bridge docs so you always have one place to look them up.\n\n## Recommended Reading Together\n\n- [`glossary.md`](./glossary.md) for term meanings\n- [`entity-map.md`](./entity-map.md) for layer boundaries\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) for task vs runtime-slot separation\n- [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md) for MCP beyond tools\n\n## Two Principles To Keep In Mind\n\n### Principle 1: separate content state from process-control state\n\n- `messages`, `tool_result`, and memory text are content state\n- `turn_count`, `transition`, and retry flags are process-control state\n\n### Principle 2: separate durable state from runtime-only state\n\n- tasks, memory, and schedules are usually durable\n- runtime slots, permission decisions, and live MCP connections are usually runtime state\n\n## Query And Conversation State\n\n### `Message`\n\nStores conversation and tool round-trip history.\n\n### `NormalizedMessage`\n\nStable message shape ready for the model API.\n\n### `QueryParams`\n\nExternal input used to start one query process.\n\n### `QueryState`\n\nMutable state that changes across turns.\n\n### `TransitionReason`\n\nExplains why the next turn exists.\n\n### `CompactSummary`\n\nCompressed carry-forward summary when old context leaves the hot window.\n\n## Prompt And Input State\n\n### `SystemPromptBlock`\n\nOne stable prompt fragment.\n\n### `PromptParts`\n\nSeparated prompt fragments before final assembly.\n\n### `ReminderMessage`\n\nTemporary one-turn or one-mode injection.\n\n## Tool And Control-Plane State\n\n### `ToolSpec`\n\nWhat the model knows about one tool.\n\n### `ToolDispatchMap`\n\nName-to-handler routing table.\n\n### `ToolUseContext`\n\nShared execution environment visible to tools.\n\n### `ToolResultEnvelope`\n\nNormalized result returned into the main loop.\n\n### `PermissionRule`\n\nPolicy that decides allow / deny / ask.\n\n### `PermissionDecision`\n\nStructured output of the permission gate.\n\n### `HookEvent`\n\nNormalized lifecycle event emitted around the loop.\n\n## Durable Work State\n\n### `TaskRecord`\n\nDurable work-graph node with goal, status, and dependency edges.\n\n### `ScheduleRecord`\n\nRule describing when work should trigger.\n\n### `MemoryEntry`\n\nCross-session fact worth keeping.\n\n## Runtime Execution State\n\n### `RuntimeTaskState`\n\nLive execution-slot record for background or long-running work.\n\n### `Notification`\n\nSmall result bridge that carries runtime outcomes back into the main loop.\n\n### `RecoveryState`\n\nState used to continue coherently after failures.\n\n## Team And Platform State\n\n### `TeamMember`\n\nPersistent teammate identity.\n\n### `MessageEnvelope`\n\nStructured message between teammates.\n\n### `RequestRecord`\n\nDurable record for approvals, shutdowns, handoffs, or other protocol workflows.\n\n### `WorktreeRecord`\n\nRecord for one isolated execution lane.\n\n### `MCPServerConfig`\n\nConfiguration for one external capability provider.\n\n### `CapabilityRoute`\n\nRouting decision for native, plugin, or MCP-backed capability.\n\n## A Useful Quick Map\n\n| Record | Main Job | Usually Lives In |\n|---|---|---|\n| `Message` | conversation history | `messages[]` |\n| `QueryState` | turn-by-turn control | query engine |\n| `ToolUseContext` | tool execution environment | tool control plane |\n| `PermissionDecision` | execution gate outcome | permission layer |\n| `TaskRecord` | durable work goal | task board |\n| `RuntimeTaskState` | live execution slot | runtime manager |\n| `TeamMember` | persistent teammate | team config |\n| `RequestRecord` | protocol state | request tracker |\n| `WorktreeRecord` | isolated execution lane | worktree index |\n| `MCPServerConfig` | external capability config | settings / plugin config |\n\n## Key Takeaway\n\n**High-completion systems become much easier to understand when every important record has one clear job and one clear layer.**\n"
+ },
+ {
+ "version": null,
+ "slug": "entity-map",
+ "locale": "en",
+ "title": "Entity Map",
+ "kind": "bridge",
+ "filename": "entity-map.md",
+ "content": "# Entity Map\n\n> **Reference** -- Use this when concepts start to blur together. It tells you which layer each thing belongs to.\n\nAs you move into the second half of the repo, you will notice that the main source of confusion is often not code. It is the fact that many entities look similar while living on different layers. This map helps you keep them straight.\n\n## How This Map Differs From Other Docs\n\n- this map answers: **which layer does this thing belong to?**\n- [`glossary.md`](./glossary.md) answers: **what does the word mean?**\n- [`data-structures.md`](./data-structures.md) answers: **what does the state shape look like?**\n\n## A Fast Layered Picture\n\n```text\nconversation layer\n - message\n - prompt block\n - reminder\n\naction layer\n - tool call\n - tool result\n - hook event\n\nwork layer\n - work-graph task\n - runtime task\n - protocol request\n\nexecution layer\n - subagent\n - teammate\n - worktree lane\n\nplatform layer\n - MCP server\n - memory record\n - capability router\n```\n\n## The Most Commonly Confused Pairs\n\n### `Message` vs `PromptBlock`\n\n| Entity | What It Is | What It Is Not |\n|---|---|---|\n| `Message` | conversational content in history | not a stable system rule |\n| `PromptBlock` | stable prompt instruction fragment | not one turn's latest event |\n\n### `Todo / Plan` vs `Task`\n\n| Entity | What It Is | What It Is Not |\n|---|---|---|\n| `todo / plan` | temporary session guidance | not a durable work graph |\n| `task` | durable work node | not one turn's local thought |\n\n### `Work-Graph Task` vs `RuntimeTaskState`\n\n| Entity | What It Is | What It Is Not |\n|---|---|---|\n| work-graph task | durable goal and dependency node | not the live executor |\n| runtime task | currently running execution slot | not the durable dependency node |\n\n### `Subagent` vs `Teammate`\n\n| Entity | What It Is | What It Is Not |\n|---|---|---|\n| subagent | one-shot delegated worker | not a long-lived team member |\n| teammate | persistent collaborator with identity and inbox | not a disposable summary tool |\n\n### `ProtocolRequest` vs normal message\n\n| Entity | What It Is | What It Is Not |\n|---|---|---|\n| normal message | free-form communication | not a traceable approval workflow |\n| protocol request | structured request with `request_id` | not casual chat text |\n\n### `Task` vs `Worktree`\n\n| Entity | What It Is | What It Is Not |\n|---|---|---|\n| task | what should be done | not a directory |\n| worktree | where isolated execution happens | not the goal itself |\n\n### `Memory` vs `CLAUDE.md`\n\n| Entity | What It Is | What It Is Not |\n|---|---|---|\n| memory | durable cross-session facts | not the project rule file |\n| `CLAUDE.md` | stable local rule / instruction surface | not user-specific long-term fact storage |\n\n### `MCPServer` vs `MCPTool`\n\n| Entity | What It Is | What It Is Not |\n|---|---|---|\n| MCP server | external capability provider | not one specific tool |\n| MCP tool | one exposed capability | not the whole connection surface |\n\n## Quick \"What / Where\" Table\n\n| Entity | Main Job | Typical Place |\n|---|---|---|\n| `Message` | visible conversation context | `messages[]` |\n| `PromptParts` | input assembly fragments | prompt builder |\n| `PermissionRule` | execution decision rules | settings / session state |\n| `HookEvent` | lifecycle extension point | hook system |\n| `MemoryEntry` | durable fact | memory store |\n| `TaskRecord` | work goal node | task board |\n| `RuntimeTaskState` | live execution slot | runtime manager |\n| `TeamMember` | persistent worker identity | team config |\n| `MessageEnvelope` | structured teammate message | inbox |\n| `RequestRecord` | protocol workflow state | request tracker |\n| `WorktreeRecord` | isolated execution lane | worktree index |\n| `MCPServerConfig` | external capability provider config | plugin / settings |\n\n## Key Takeaway\n\n**The more capable the system becomes, the more important clear entity boundaries become.**\n"
+ },
+ {
+ "version": null,
+ "slug": "glossary",
+ "locale": "en",
+ "title": "Glossary",
+ "kind": "bridge",
+ "filename": "glossary.md",
+ "content": "# Glossary\n\n> **Reference** -- Bookmark this page. Come back whenever you hit an unfamiliar term.\n\nThis glossary collects the terms that matter most to the teaching mainline -- the ones that most often trip up beginners. If you find yourself staring at a word mid-chapter and thinking \"wait, what does that mean again?\", this is the page to return to.\n\n## Recommended Companion Docs\n\n- [`entity-map.md`](./entity-map.md) for layer boundaries\n- [`data-structures.md`](./data-structures.md) for record shapes\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) if you keep mixing up different kinds of \"task\"\n\n## Agent\n\nA model that can reason over input and call tools to complete work. (Think of it as the \"brain\" that decides what to do next.)\n\n## Harness\n\nThe working environment prepared around the model -- everything the model needs but cannot provide for itself:\n\n- tools\n- filesystem\n- permissions\n- prompt assembly\n- memory\n- task runtime\n\n## Agent Loop\n\nThe repeating core cycle that drives every agent session. Each iteration looks like this:\n\n1. send current input to the model\n2. inspect whether it answered or asked for tools\n3. execute tools if needed\n4. write results back\n5. continue or stop\n\n## Message / `messages[]`\n\nThe visible conversation and tool-result history used as working context. (This is the rolling transcript the model sees on every turn.)\n\n## Tool\n\nAn action the model may request, such as reading a file, writing a file, editing content, or running a shell command.\n\n## Tool Schema\n\nThe description shown to the model:\n\n- name\n- purpose\n- input parameters\n- input types\n\n## Dispatch Map\n\nA routing table from tool names to handlers. (Like a phone switchboard: the name comes in, and the map connects it to the right function.)\n\n## Stop Reason\n\nWhy the current model turn ended. Common values:\n\n- `end_turn`\n- `tool_use`\n- `max_tokens`\n\n## Context\n\nThe total information currently visible to the model. (Everything inside the model's \"window\" on a given turn.)\n\n## Compaction\n\nThe process of shrinking active context while preserving the important storyline and next-step information. (Like summarizing meeting notes so you keep the action items but drop the small talk.)\n\n## Subagent\n\nA one-shot delegated worker that runs in a separate context and usually returns a summary. (A temporary helper spun up for one job, then discarded.)\n\n## Permission\n\nThe decision layer that determines whether a requested action may execute.\n\n## Hook\n\nAn extension point that lets the system observe or add side effects around the loop without rewriting the loop itself. (Like event listeners -- the loop fires a signal, and hooks respond.)\n\n## Memory\n\nCross-session information worth keeping because it remains valuable later and is not cheap to re-derive.\n\n## System Prompt\n\nThe stable system-level instruction surface that defines identity, rules, and long-lived constraints.\n\n## Query\n\nThe full multi-turn process used to complete one user request. (One query may span many loop turns before the answer is ready.)\n\n## Transition Reason\n\nThe reason the system continues into another turn.\n\n## Task\n\nA durable work goal node in the work graph. (Unlike a todo item that disappears when the session ends, a task persists.)\n\n## Runtime Task / Runtime Slot\n\nA live execution slot representing something currently running. (The task says \"what should happen\"; the runtime slot says \"it is happening right now.\")\n\n## Teammate\n\nA persistent collaborator inside a multi-agent system. (Unlike a subagent that is fire-and-forget, a teammate sticks around.)\n\n## Protocol Request\n\nA structured request with explicit identity, status, and tracking, usually backed by a `request_id`. (A formal envelope rather than a casual message.)\n\n## Worktree\n\nAn isolated execution directory lane used so parallel work does not collide. (Each lane gets its own copy of the workspace, like separate desks for separate tasks.)\n\n## MCP\n\nModel Context Protocol. In this repo it represents an external capability integration surface, not only a tool list. (The bridge that lets your agent talk to outside services.)\n\n## DAG\n\nDirected Acyclic Graph. A set of nodes connected by one-way edges with no cycles. (If you draw arrows between tasks showing \"A must finish before B\", and no arrow path ever loops back to where it started, you have a DAG.) Used in this repo for task dependency graphs.\n\n## FSM / State Machine\n\nFinite State Machine. A system that is always in exactly one state from a known set, and transitions between states based on defined events. (Think of a traffic light cycling through red, green, and yellow.) The agent loop's turn logic is modeled as a state machine.\n\n## Control Plane\n\nThe layer that decides what should happen next, as opposed to the layer that actually does the work. (Air traffic control versus the airplane.) In this repo, the query engine and tool dispatch act as control planes.\n\n## Tokens\n\nThe atomic units a language model reads and writes. One token is roughly 3/4 of an English word. Context limits and compaction thresholds are measured in tokens.\n"
+ },
+ {
+ "version": null,
+ "slug": "s00-architecture-overview",
+ "locale": "en",
+ "title": "s00: Architecture Overview",
+ "kind": "bridge",
+ "filename": "s00-architecture-overview.md",
+ "content": "# s00: Architecture Overview\n\nWelcome to the map. Before diving into building piece by piece, it helps to see the whole picture from above. This document shows you what the full system contains, why the chapters are ordered this way, and what you will actually learn.\n\n## The Big Picture\n\nThe mainline of this repo is reasonable because it grows the system in four dependency-driven stages:\n\n1. build a real single-agent loop\n2. harden that loop with safety, memory, and recovery\n3. turn temporary session work into durable runtime work\n4. grow the single executor into a multi-agent platform with isolated lanes and external capability routing\n\nThis order follows **mechanism dependencies**, not file order and not product glamour.\n\nIf the learner does not already understand:\n\n`user input -> model -> tools -> write-back -> next turn`\n\nthen permissions, hooks, memory, tasks, teams, worktrees, and MCP all become disconnected vocabulary.\n\n## What This Repo Is Trying To Reconstruct\n\nThis repository is not trying to mirror a production codebase line by line.\n\nIt is trying to reconstruct the parts that determine whether an agent system actually works:\n\n- what the main modules are\n- how those modules cooperate\n- what each module is responsible for\n- where the important state lives\n- how one request flows through the system\n\nThat means the goal is:\n\n**high fidelity to the design backbone, not 1:1 fidelity to every outer implementation detail.**\n\n## Three Tips Before You Start\n\n### Tip 1: Learn the smallest correct version first\n\nFor example, a subagent does not need every advanced capability on day one.\n\nThe smallest correct version already teaches the core lesson:\n\n- the parent defines the subtask\n- the child gets a separate `messages[]`\n- the child returns a summary\n\nOnly after that is stable should you add:\n\n- inherited context\n- separate permissions\n- background runtime\n- worktree isolation\n\n### Tip 2: New terms should be explained before they are used\n\nThis repo uses terms such as:\n\n- state machine\n- dispatch map\n- dependency graph\n- worktree\n- protocol envelope\n- MCP\n\nIf a term is unfamiliar, pause and check the reference docs rather than pushing forward blindly.\n\nRecommended companions:\n\n- [`glossary.md`](./glossary.md)\n- [`entity-map.md`](./entity-map.md)\n- [`data-structures.md`](./data-structures.md)\n- [`teaching-scope.md`](./teaching-scope.md)\n\n### Tip 3: Do not let peripheral complexity pretend to be core mechanism\n\nGood teaching does not try to include everything.\n\nIt explains the important parts completely and keeps low-value complexity out of your way:\n\n- packaging and release flow\n- enterprise integration glue\n- telemetry\n- product-specific compatibility branches\n- file-name / line-number reverse-engineering trivia\n\n## Bridge Docs That Matter\n\nTreat these as cross-chapter maps:\n\n| Doc | What It Clarifies |\n|---|---|\n| [`s00d-chapter-order-rationale.md`](./s00d-chapter-order-rationale.md) (Deep Dive) | why the curriculum order is what it is |\n| [`s00e-reference-module-map.md`](./s00e-reference-module-map.md) (Deep Dive) | how the reference repo's real module clusters map onto the current curriculum |\n| [`s00a-query-control-plane.md`](./s00a-query-control-plane.md) (Deep Dive) | why a high-completion agent needs more than `messages[] + while True` |\n| [`s00b-one-request-lifecycle.md`](./s00b-one-request-lifecycle.md) (Deep Dive) | how one request moves through the full system |\n| [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md) (Deep Dive) | why tools become a control plane, not just a function table |\n| [`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md) (Deep Dive) | why system prompt is only one input surface |\n| [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) (Deep Dive) | why durable tasks and live runtime slots must split |\n| [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md) (Deep Dive) | why MCP is more than a remote tool list |\n\n## The Four Learning Stages\n\n### Stage 1: Core Single-Agent (`s01-s06`)\n\nGoal: build a single agent that can actually do work.\n\n| Chapter | New Layer |\n|---|---|\n| `s01` | loop and write-back |\n| `s02` | tools and dispatch |\n| `s03` | session planning |\n| `s04` | delegated subtask isolation |\n| `s05` | skill discovery and loading |\n| `s06` | context compaction |\n\n### Stage 2: Hardening (`s07-s11`)\n\nGoal: make the loop safer, more stable, and easier to extend.\n\n| Chapter | New Layer |\n|---|---|\n| `s07` | permission gate |\n| `s08` | hooks and side effects |\n| `s09` | durable memory |\n| `s10` | prompt assembly |\n| `s11` | recovery and continuation |\n\n### Stage 3: Runtime Work (`s12-s14`)\n\nGoal: upgrade session work into durable, background, and scheduled runtime work.\n\n| Chapter | New Layer |\n|---|---|\n| `s12` | persistent task graph |\n| `s13` | runtime execution slots |\n| `s14` | time-based triggers |\n\n### Stage 4: Platform (`s15-s19`)\n\nGoal: grow from one executor into a larger platform.\n\n| Chapter | New Layer |\n|---|---|\n| `s15` | persistent teammates |\n| `s16` | structured team protocols |\n| `s17` | autonomous claiming and resuming |\n| `s18` | isolated execution lanes |\n| `s19` | external capability routing |\n\n## Quick Reference: What Each Chapter Adds\n\n| Chapter | Core Structure | What You Should Be Able To Build |\n|---|---|---|\n| `s01` | `LoopState`, `tool_result` write-back | a minimal working agent loop |\n| `s02` | `ToolSpec`, dispatch map | stable tool routing |\n| `s03` | `TodoItem`, `PlanState` | visible session planning |\n| `s04` | isolated child context | delegated subtasks without polluting the parent |\n| `s05` | `SkillRegistry` | cheap discovery and deep on-demand loading |\n| `s06` | compaction records | long sessions that stay usable |\n| `s07` | permission decisions | execution behind a gate |\n| `s08` | lifecycle events | extension without rewriting the loop |\n| `s09` | memory records | selective long-term memory |\n| `s10` | prompt parts | staged input assembly |\n| `s11` | continuation reasons | recovery branches that stay legible |\n| `s12` | `TaskRecord` | durable work graphs |\n| `s13` | `RuntimeTaskState` | background execution with later write-back |\n| `s14` | `ScheduleRecord` | time-triggered work |\n| `s15` | `TeamMember`, inboxes | persistent teammates |\n| `s16` | protocol envelopes | structured request / response coordination |\n| `s17` | claim policy | self-claim and self-resume |\n| `s18` | `WorktreeRecord` | isolated execution lanes |\n| `s19` | capability routing | unified native + plugin + MCP routing |\n\n## Key Takeaway\n\n**A good chapter order is not a list of features. It is a path where each mechanism grows naturally out of the last one.**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00a-query-control-plane",
+ "locale": "en",
+ "title": "s00a: Query Control Plane",
+ "kind": "bridge",
+ "filename": "s00a-query-control-plane.md",
+ "content": "# s00a: Query Control Plane\n\n> **Deep Dive** -- Best read after completing Stage 1 (s01-s06). It explains why the simple loop needs a coordination layer as the system grows.\n\n### When to Read This\n\nAfter you've built the basic loop and tools, and before you start Stage 2's hardening chapters.\n\n---\n\n> This bridge document answers one foundational question:\n>\n> **Why is `messages[] + while True` not enough for a high-completion agent?**\n\n## Why This Document Exists\n\n`s01` correctly teaches the smallest working loop:\n\n```text\nuser input\n ->\nmodel response\n ->\nif tool_use then execute\n ->\nappend result\n ->\ncontinue\n```\n\nThat is the right starting point.\n\nBut once the system grows, the harness needs a separate layer that manages the **query process itself**. A \"control plane\" (the part of a system that coordinates behavior rather than performing the work directly) sits above the data path and decides when, why, and how the loop should keep running:\n\n- current turn\n- continuation reason\n- recovery state\n- compaction state\n- budget changes\n- hook-driven continuation\n\nThat layer is the **query control plane**.\n\n## Terms First\n\n### What is a query?\n\nHere, a query is not a database lookup.\n\nIt means:\n\n> the full multi-turn process the system runs in order to finish one user request\n\n### What is a control plane?\n\nA control plane does not perform the business action itself.\n\nIt coordinates:\n\n- when execution continues\n- why it continues\n- what state is patched before the next turn\n\nIf you have worked with networking or infrastructure, the term is familiar -- the control plane decides where traffic goes, while the data plane carries the actual packets. The same idea applies here: the control plane decides whether the loop should keep running and why, while the execution layer does the actual model calls and tool work.\n\n### What is a transition?\n\nA transition explains:\n\n> why the previous turn did not end and why the next turn exists\n\nCommon reasons:\n\n- tool result write-back\n- truncated output recovery\n- retry after compaction\n- retry after transport failure\n\n## The Smallest Useful Mental Model\n\nThink of the query path in three layers:\n\n```text\n1. Input layer\n - messages\n - system prompt\n - user/system context\n\n2. Control layer\n - query state\n - turn count\n - transition reason\n - recovery / compaction / budget flags\n\n3. Execution layer\n - model call\n - tool execution\n - write-back\n```\n\nThe control plane does not replace the loop.\n\nIt makes the loop capable of handling more than one happy-path branch.\n\n## Why `messages[]` Alone Stops Being Enough\n\nAt demo scale, many learners put everything into `messages[]`.\n\nThat breaks down once the system needs to know:\n\n- whether reactive compaction already ran\n- how many continuation attempts happened\n- whether this turn is a retry or a normal write-back\n- whether a temporary output budget is active\n\nThose are not conversation contents.\n\nThey are **process-control state**.\n\n## Core Structures\n\n### `QueryParams`\n\nExternal input passed into the query engine:\n\n```python\nparams = {\n \"messages\": [...],\n \"system_prompt\": \"...\",\n \"tool_use_context\": {...},\n \"max_output_tokens_override\": None,\n \"max_turns\": None,\n}\n```\n\n### `QueryState`\n\nMutable state that changes across turns:\n\n```python\nstate = {\n \"messages\": [...],\n \"tool_use_context\": {...},\n \"turn_count\": 1,\n \"continuation_count\": 0,\n \"has_attempted_compact\": False,\n \"max_output_tokens_override\": None,\n \"transition\": None,\n}\n```\n\n### `TransitionReason`\n\nAn explicit reason for continuing:\n\n```python\nTRANSITIONS = (\n \"tool_result_continuation\",\n \"max_tokens_recovery\",\n \"compact_retry\",\n \"transport_retry\",\n)\n```\n\nThis is not ceremony. It makes logs, testing, debugging, and teaching much clearer.\n\n## Minimal Implementation Pattern\n\n### 1. Split entry params from live state\n\n```python\ndef query(params):\n state = {\n \"messages\": params[\"messages\"],\n \"tool_use_context\": params[\"tool_use_context\"],\n \"turn_count\": 1,\n \"transition\": None,\n }\n```\n\n### 2. Let every continue-site patch state explicitly\n\n```python\nstate[\"transition\"] = \"tool_result_continuation\"\nstate[\"turn_count\"] += 1\n```\n\n### 3. Make the next turn enter with a reason\n\nThe next loop iteration should know whether it exists because of:\n\n- normal write-back\n- retry\n- compaction\n- continuation after truncated output\n\n## What This Changes For You\n\nOnce you see the query control plane clearly, later chapters stop feeling like random features.\n\n- `s06` compaction becomes a state patch, not a magic jump\n- `s11` recovery becomes structured continuation, not just `try/except`\n- `s17` autonomy becomes another controlled continuation path, not a separate mystery loop\n\n## Key Takeaway\n\n**A query is not just messages flowing through a loop. It is a controlled process with explicit continuation state.**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00b-one-request-lifecycle",
+ "locale": "en",
+ "title": "s00b: One Request Lifecycle",
+ "kind": "bridge",
+ "filename": "s00b-one-request-lifecycle.md",
+ "content": "# s00b: One Request Lifecycle\n\n> **Deep Dive** -- Best read after Stage 2 (s07-s11) when you want to see how all the pieces connect end-to-end.\n\n### When to Read This\n\nWhen you've learned several subsystems and want to see the full vertical flow of a single request.\n\n---\n\n> This bridge document connects the whole system into one continuous execution chain.\n>\n> It answers:\n>\n> **What really happens after one user message enters the system?**\n\n## Why This Document Exists\n\nWhen you read chapter by chapter, you can understand each mechanism in isolation:\n\n- `s01` loop\n- `s02` tools\n- `s07` permissions\n- `s09` memory\n- `s12-s19` tasks, teams, worktrees, MCP\n\nBut implementation gets difficult when you cannot answer:\n\n- what comes first?\n- when do memory and prompt assembly happen?\n- where do permissions sit relative to tools?\n- when do tasks, runtime slots, teammates, worktrees, and MCP enter?\n\nThis document gives you the vertical flow.\n\n## The Most Important Full Picture\n\n```text\nuser request\n |\n v\ninitialize query state\n |\n v\nassemble system prompt / messages / reminders\n |\n v\ncall model\n |\n +-- normal answer --------------------------> finish request\n |\n +-- tool_use\n |\n v\n tool router\n |\n +-- permission gate\n +-- hooks\n +-- native tool / MCP / agent / task / team\n |\n v\n execution result\n |\n +-- may update task / runtime / memory / worktree state\n |\n v\n write tool_result back to messages\n |\n v\n patch query state\n |\n v\n continue next turn\n```\n\n## Segment 1: A User Request Becomes Query State\n\nThe system does not treat one user request as one API call.\n\nIt first creates a query state for a process that may span many turns:\n\n```python\nquery_state = {\n \"messages\": [{\"role\": \"user\", \"content\": user_text}],\n \"turn_count\": 1,\n \"transition\": None,\n \"tool_use_context\": {...},\n}\n```\n\nThe key mental shift:\n\n**a request is a multi-turn runtime process, not a single model response.**\n\nRelated reading:\n\n- [`s01-the-agent-loop.md`](./s01-the-agent-loop.md)\n- [`s00a-query-control-plane.md`](./s00a-query-control-plane.md)\n\n## Segment 2: The Real Model Input Is Assembled\n\nThe harness usually does not send raw `messages` directly.\n\nIt assembles:\n\n- system prompt blocks\n- normalized messages\n- memory attachments\n- reminders\n- tool definitions\n\nSo the actual payload is closer to:\n\n```text\nsystem prompt\n+ normalized messages\n+ tools\n+ optional reminders and attachments\n```\n\nRelated chapters:\n\n- `s09`\n- `s10`\n- [`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md)\n\n## Segment 3: The Model Produces Either an Answer or an Action Intent\n\nThere are two important output classes.\n\n### Normal answer\n\nThe request may end here.\n\n### Action intent\n\nThis usually means a tool call, for example:\n\n- `read_file(...)`\n- `bash(...)`\n- `task_create(...)`\n- `mcp__server__tool(...)`\n\nThe system is no longer receiving only text.\n\nIt is receiving an instruction that should affect the real world.\n\n## Segment 4: The Tool Control Plane Takes Over\n\nOnce `tool_use` appears, the system enters the tool control plane (the layer that decides how a tool call gets routed, checked, and executed).\n\nIt answers:\n\n1. which tool is this?\n2. where should it route?\n3. should it pass a permission gate?\n4. do hooks observe or modify the action?\n5. what shared runtime context can it access?\n\nMinimal picture:\n\n```text\ntool_use\n |\n v\ntool router\n |\n +-- native handler\n +-- MCP client\n +-- agent / team / task runtime\n```\n\nRelated reading:\n\n- [`s02-tool-use.md`](./s02-tool-use.md)\n- [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md)\n\n## Segment 5: Execution May Update More Than Messages\n\nA tool result does not only return text.\n\nExecution may also update:\n\n- task board state\n- runtime task state\n- memory records\n- request records\n- worktree records\n\nThat is why middle and late chapters are not optional side features. They become part of the request lifecycle.\n\n## Segment 6: Results Rejoin the Main Loop\n\nThe crucial step is always the same:\n\n```text\nreal execution result\n ->\ntool_result or structured write-back\n ->\nmessages / query state updated\n ->\nnext turn\n```\n\nIf the result never re-enters the loop, the model cannot reason over reality.\n\n## A Useful Compression\n\nWhen you get lost, compress the whole lifecycle into three layers:\n\n### Query loop\n\nOwns the multi-turn request process.\n\n### Tool control plane\n\nOwns routing, permissions, hooks, and execution context.\n\n### Platform state\n\nOwns durable records such as tasks, runtime slots, teammates, worktrees, and external capability configuration.\n\n## Key Takeaway\n\n**A user request enters as query state, moves through assembled input, becomes action intent, crosses the tool control plane, touches platform state, and then returns to the loop as new visible context.**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00c-query-transition-model",
+ "locale": "en",
+ "title": "s00c: Query Transition Model",
+ "kind": "bridge",
+ "filename": "s00c-query-transition-model.md",
+ "content": "# s00c: Query Transition Model\n\n> **Deep Dive** -- Best read alongside s11 (Error Recovery). It deepens the transition model introduced in s00a.\n\n### When to Read This\n\nWhen you're working on error recovery and want to understand why each continuation needs an explicit reason.\n\n---\n\n> This bridge note answers one narrow but important question:\n>\n> **Why does a high-completion agent need to know _why_ a query continues into the next turn, instead of treating every `continue` as the same thing?**\n\n## Why This Note Exists\n\nThe mainline already teaches:\n\n- `s01`: the smallest loop\n- `s06`: compaction and context control\n- `s11`: error recovery\n\nThat sequence is correct.\n\nThe problem is what you often carry in your head after reading those chapters separately:\n\n> \"The loop continues because it continues.\"\n\nThat is enough for a toy demo, but it breaks down quickly in a larger system.\n\nA query can continue for very different reasons:\n\n- a tool just finished and the model needs the result\n- the output hit a token limit and the model should continue\n- compaction changed the active context and the system should retry\n- the transport layer failed and backoff says \"try again\"\n- a stop hook said the turn should not fully end yet\n- a budget policy still allows the system to keep going\n\nIf all of those collapse into one vague `continue`, three things get worse fast:\n\n- logs stop being readable\n- tests stop being precise\n- the teaching mental model becomes blurry\n\n## Terms First\n\n### What is a transition\n\nHere, a transition means:\n\n> the reason the previous turn became the next turn\n\nIt is not the message content itself. It is the control-flow cause.\n\n### What is a continuation\n\nA continuation means:\n\n> this query is still alive and should keep advancing\n\nBut continuation is not one thing. It is a family of reasons.\n\n### What is a query boundary\n\nA query boundary is the edge between one turn and the next.\n\nWhenever the system crosses that boundary, it should know:\n\n- why it is crossing\n- what state was changed before the crossing\n- how the next turn should interpret that change\n\n## The Minimum Mental Model\n\nDo not picture a query as a single straight line.\n\nA better mental model is:\n\n```text\none query\n = a chain of state transitions\n with explicit continuation reasons\n```\n\nFor example:\n\n```text\nuser input\n ->\nmodel emits tool_use\n ->\ntool finishes\n ->\ntool_result_continuation\n ->\nmodel output is truncated\n ->\nmax_tokens_recovery\n ->\ncompaction happens\n ->\ncompact_retry\n ->\nfinal completion\n```\n\nThat is why the real lesson is not:\n\n> \"the loop keeps spinning\"\n\nThe real lesson is:\n\n> \"the system is advancing through typed transition reasons\"\n\n## Core Records\n\n### 1. `transition` inside query state\n\nEven a teaching implementation should carry an explicit transition field:\n\n```python\nstate = {\n \"messages\": [...],\n \"turn_count\": 3,\n \"continuation_count\": 1,\n \"has_attempted_compact\": False,\n \"transition\": None,\n}\n```\n\nThis field is not decoration.\n\nIt tells you:\n\n- why this turn exists\n- how the log should explain it\n- what path a test should assert\n\n### 2. `TransitionReason`\n\nA minimal teaching set can look like this:\n\n```python\nTRANSITIONS = (\n \"tool_result_continuation\",\n \"max_tokens_recovery\",\n \"compact_retry\",\n \"transport_retry\",\n \"stop_hook_continuation\",\n \"budget_continuation\",\n)\n```\n\nThese reasons are not equivalent:\n\n- `tool_result_continuation`\n is normal loop progress\n- `max_tokens_recovery`\n is continuation after truncated output\n- `compact_retry`\n is continuation after context reshaping\n- `transport_retry`\n is continuation after infrastructure failure\n- `stop_hook_continuation`\n is continuation forced by external control logic\n- `budget_continuation`\n is continuation allowed by policy and remaining budget\n\n### 3. Continuation budget\n\nHigh-completion systems do not just continue. They limit continuation.\n\nTypical fields look like:\n\n```python\nstate = {\n \"max_output_tokens_recovery_count\": 2,\n \"has_attempted_reactive_compact\": True,\n}\n```\n\nThe principle is:\n\n> continuation is a controlled resource, not an infinite escape hatch\n\n## Minimum Implementation Steps\n\n### Step 1: make every continue site explicit\n\nMany beginner loops still look like this:\n\n```python\ncontinue\n```\n\nMove one step forward:\n\n```python\nstate[\"transition\"] = \"tool_result_continuation\"\ncontinue\n```\n\n### Step 2: pair each continuation with its state patch\n\n```python\nif response.stop_reason == \"tool_use\":\n state[\"messages\"] = append_tool_results(...)\n state[\"turn_count\"] += 1\n state[\"transition\"] = \"tool_result_continuation\"\n continue\n\nif response.stop_reason == \"max_tokens\":\n state[\"messages\"].append({\n \"role\": \"user\",\n \"content\": CONTINUE_MESSAGE,\n })\n state[\"max_output_tokens_recovery_count\"] += 1\n state[\"transition\"] = \"max_tokens_recovery\"\n continue\n```\n\nThe important part is not \"one more line of code.\"\n\nThe important part is:\n\n> before every continuation, the system knows both the reason and the state mutation\n\n### Step 3: separate normal progress from recovery\n\n```python\nif should_retry_transport(error):\n time.sleep(backoff(...))\n state[\"transition\"] = \"transport_retry\"\n continue\n\nif should_recompact(error):\n state[\"messages\"] = compact_messages(state[\"messages\"])\n state[\"transition\"] = \"compact_retry\"\n continue\n```\n\nOnce you do this, \"continue\" stops being a vague action and becomes a typed control transition.\n\n## What to Test\n\nYour teaching repo should make these assertions straightforward:\n\n- a tool result writes `tool_result_continuation`\n- a truncated model output writes `max_tokens_recovery`\n- compaction retry does not silently reuse the old reason\n- transport retry increments retry state and does not look like a normal turn\n\nIf those paths are not easy to test, the model is probably still too implicit.\n\n## What Not to Over-Teach\n\nYou do not need to bury yourself in vendor-specific transport details or every corner-case enum.\n\nFor a teaching repo, the core lesson is narrower:\n\n> one query is a sequence of explicit transitions, and each transition should carry a reason, a state patch, and a budget rule\n\nThat is the part you actually need if you want to rebuild a high-completion agent from zero.\n\n## Key Takeaway\n\n**Every continuation needs a typed reason. Without one, logs blur, tests weaken, and the mental model collapses into \"the loop keeps spinning.\"**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00d-chapter-order-rationale",
+ "locale": "en",
+ "title": "s00d: Chapter Order Rationale",
+ "kind": "bridge",
+ "filename": "s00d-chapter-order-rationale.md",
+ "content": "# s00d: Chapter Order Rationale\n\n> **Deep Dive** -- Read this after completing Stage 1 (s01-s06) or whenever you wonder \"why is the course ordered this way?\"\n\nThis note is not about one mechanism. It answers a more basic teaching question: why does this curriculum teach the system in the current order instead of following source-file order, feature hype, or raw implementation complexity?\n\n## Conclusion First\n\nThe current `s01 -> s19` order is structurally sound.\n\nIts strength is not just breadth. Its strength is that it grows the system in the same order you should understand it:\n\n1. Build the smallest working agent loop.\n2. Add the control-plane and hardening layers around that loop.\n3. Upgrade session planning into durable work and runtime state.\n4. Only then expand into persistent teams, isolated execution lanes, and external capability buses.\n\nThat is the right teaching order because it follows:\n\n**dependency order between mechanisms**\n\nnot file order or product packaging order.\n\n## The Four Dependency Lines\n\nThis curriculum is really organized by four dependency lines:\n\n1. `core loop dependency`\n2. `control-plane dependency`\n3. `work-state dependency`\n4. `platform-boundary dependency`\n\nIn plain English:\n\n```text\nfirst make the agent run\n -> then make it run safely\n -> then make it run durably\n -> then make it run as a platform\n```\n\n## The Real Shape of the Sequence\n\n```text\ns01-s06\n build one working single-agent system\n\ns07-s11\n harden and control that system\n\ns12-s14\n turn temporary planning into durable work + runtime\n\ns15-s19\n expand into teammates, protocols, autonomy, isolated lanes, and external capability\n```\n\nAfter each stage, you should be able to say:\n\n- after `s06`: \"I can build one real single-agent harness\"\n- after `s11`: \"I can make that harness safer, steadier, and easier to extend\"\n- after `s14`: \"I can manage durable work, background execution, and time-triggered starts\"\n- after `s19`: \"I understand the platform boundary of a high-completion agent system\"\n\n## Why The Early Chapters Must Stay In Their Current Order\n\n### `s01` must stay first\n\nBecause it establishes:\n\n- the minimal entry point\n- the turn-by-turn loop\n- why tool results must flow back into the next model call\n\nWithout this, everything later becomes disconnected feature talk.\n\n### `s02` must immediately follow `s01`\n\nBecause an agent that cannot route intent into tools is still only talking, not acting.\n\n`s02` is where learners first see the harness become real:\n\n- model emits `tool_use`\n- the system dispatches to a handler\n- the tool executes\n- `tool_result` flows back into the loop\n\n### `s03` should stay before `s04`\n\nThis is an important guardrail.\n\nYou should first understand:\n\n- how the current agent organizes its own work\n\nbefore learning:\n\n- when to delegate work into a separate sub-context\n\nIf `s04` comes too early, subagents become an escape hatch instead of a clear isolation mechanism.\n\n### `s05` should stay before `s06`\n\nThese two chapters solve two halves of the same problem:\n\n- `s05`: prevent unnecessary knowledge from entering the context\n- `s06`: manage the context that still must remain active\n\nThat order matters. A good system first avoids bloat, then compacts what is still necessary.\n\n## Why `s07-s11` Form One Hardening Block\n\nThese chapters all answer the same larger question:\n\n**the loop already works, so how does it become stable, safe, and legible as a real system?**\n\n### `s07` should stay before `s08`\n\nPermission comes first because the system must first answer:\n\n- may this action happen at all\n- should it be denied\n- should it ask the user first\n\nOnly after that should you teach hooks, which answer:\n\n- what extra behavior attaches around the loop\n\nSo the correct teaching order is:\n\n**gate first, extend second**\n\n### `s09` should stay before `s10`\n\nThis is another very important ordering decision.\n\n`s09` teaches:\n\n- what durable information exists\n- which facts deserve long-term storage\n\n`s10` teaches:\n\n- how multiple information sources are assembled into model input\n\nThat means:\n\n- memory defines one content source\n- prompt assembly explains how all content sources are combined\n\nIf you reverse them, prompt construction starts to feel arbitrary and mysterious.\n\n### `s11` is the right closing chapter for this block\n\nError recovery is not an isolated feature.\n\nIt is where the system finally needs to explain:\n\n- why it is continuing\n- why it is retrying\n- why it is stopping\n\nThat only becomes legible after the input path, tool path, state path, and control path already exist.\n\n## Why `s12-s14` Must Stay Goal -> Runtime -> Schedule\n\nThis is the easiest part of the curriculum to teach badly if the order is wrong.\n\n### `s12` must stay before `s13`\n\n`s12` teaches:\n\n- what work exists\n- dependency relations between work nodes\n- when downstream work unlocks\n\n`s13` teaches:\n\n- what live execution is currently running\n- where background results go\n- how runtime state writes back\n\nThat is the crucial distinction:\n\n- `task` is the durable work goal\n- `runtime task` is the live execution slot\n\nIf `s13` comes first, you will almost certainly collapse those two into one concept.\n\n### `s14` must stay after `s13`\n\nCron does not add another kind of task.\n\nIt adds a new start condition:\n\n**time becomes one more way to launch work into the runtime**\n\nSo the right order is:\n\n`durable task graph -> runtime slot -> schedule trigger`\n\n## Why `s15-s19` Should Stay Team -> Protocol -> Autonomy -> Worktree -> Capability Bus\n\n### `s15` defines who persists in the system\n\nBefore protocols or autonomy make sense, the system needs durable actors:\n\n- who teammates are\n- what identity they carry\n- how they persist across work\n\n### `s16` then defines how those actors coordinate\n\nProtocols should not come before actors.\n\nProtocols exist to structure:\n\n- who requests\n- who approves\n- who responds\n- how requests remain traceable\n\n### `s17` only makes sense after both\n\nAutonomy is easy to teach vaguely.\n\nBut in a real system it only becomes clear after:\n\n- persistent teammates exist\n- structured coordination already exists\n\nOtherwise \"autonomous claiming\" sounds like magic instead of the bounded mechanism it really is.\n\n### `s18` should stay before `s19`\n\nWorktree isolation is a local execution-boundary problem:\n\n- where parallel work actually runs\n- how one work lane stays isolated from another\n\nThat should become clear before moving outward into:\n\n- plugins\n- MCP servers\n- external capability routing\n\nOtherwise you risk over-focusing on external capability and under-learning the local platform boundary.\n\n### `s19` is correctly last\n\nIt is the outer platform boundary.\n\nIt only becomes clean once you already understand:\n\n- local actors\n- local work lanes\n- local durable work\n- local runtime execution\n- then external capability providers\n\n## Five Reorders That Would Make The Course Worse\n\n1. Moving `s04` before `s03`\n This teaches delegation before local planning.\n\n2. Moving `s10` before `s09`\n This teaches prompt assembly before the learner understands one of its core inputs.\n\n3. Moving `s13` before `s12`\n This collapses durable goals and live runtime slots into one confused idea.\n\n4. Moving `s17` before `s15` or `s16`\n This turns autonomy into vague polling magic.\n\n5. Moving `s19` before `s18`\n This makes the external platform look more important than the local execution boundary.\n\n## A Good Maintainer Check Before Reordering\n\nBefore moving chapters around, ask:\n\n1. Does the learner already understand the prerequisite concept?\n2. Will this reorder blur two concepts that should stay separate?\n3. Is this chapter mainly about goals, runtime state, actors, or capability boundaries?\n4. If I move it earlier, will the reader still be able to build the minimal correct version?\n5. Am I optimizing for understanding, or merely copying source-file order?\n\nIf the honest answer to the last question is \"source-file order\", the reorder is probably a mistake.\n\n## Key Takeaway\n\n**A good chapter order is not just a list of mechanisms. It is a sequence where each chapter feels like the next natural layer grown from the previous one.**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00e-reference-module-map",
+ "locale": "en",
+ "title": "s00e: Reference Module Map",
+ "kind": "bridge",
+ "filename": "s00e-reference-module-map.md",
+ "content": "# s00e: Reference Module Map\n\n> **Deep Dive** -- Read this when you want to verify how the teaching chapters map to the real production codebase.\n\nThis is a calibration note for maintainers and serious learners. It does not turn the reverse-engineered source into required reading. Instead, it answers one narrow but important question: if you compare the high-signal module clusters in the reference repo with this teaching repo, is the current chapter order actually rational?\n\n## Verdict First\n\nYes.\n\nThe current `s01 -> s19` order is broadly correct, and it is closer to the real design backbone than any naive \"follow the source tree\" order would be.\n\nThe reason is simple:\n\n- the reference repo contains many surface-level directories\n- but the real design weight is concentrated in a smaller set of control, state, task, team, worktree, and capability modules\n- those modules line up with the current four-stage teaching path\n\nSo the right move is **not** to flatten the teaching repo into source-tree order.\n\nThe right move is:\n\n- keep the current dependency-driven order\n- make the mapping to the reference repo explicit\n- keep removing low-value product detail from the mainline\n\n## How This Comparison Was Done\n\nThe comparison was based on the reference repo's higher-signal clusters, especially modules around:\n\n- `Tool.ts`\n- `state/AppStateStore.ts`\n- `coordinator/coordinatorMode.ts`\n- `memdir/*`\n- `services/SessionMemory/*`\n- `services/toolUseSummary/*`\n- `constants/prompts.ts`\n- `tasks/*`\n- `tools/TodoWriteTool/*`\n- `tools/AgentTool/*`\n- `tools/ScheduleCronTool/*`\n- `tools/EnterWorktreeTool/*`\n- `tools/ExitWorktreeTool/*`\n- `tools/MCPTool/*`\n- `services/mcp/*`\n- `plugins/*`\n- `hooks/toolPermission/*`\n\nThis is enough to judge the backbone without dragging you through every product-facing command, compatibility branch, or UI detail.\n\n## The Real Mapping\n\n| Reference repo cluster | Typical examples | Teaching chapter(s) | Why this placement is right |\n|---|---|---|---|\n| Query loop + control state | `Tool.ts`, `AppStateStore.ts`, query/coordinator state | `s00`, `s00a`, `s00b`, `s01`, `s11` | The real system is not just `messages[] + while True`. The teaching repo is right to start with the tiny loop first, then add the control plane later. |\n| Tool routing and execution plane | `Tool.ts`, native tools, tool context, execution helpers | `s02`, `s02a`, `s02b` | The source clearly treats tools as a shared execution surface, not a toy dispatch table. The teaching split is correct. |\n| Session planning | `TodoWriteTool` | `s03` | Session planning is a small but central layer. It belongs early, before durable tasks. |\n| One-shot delegation | `AgentTool` in its simplest form | `s04` | The reference repo's agent spawning machinery is large, but the teaching repo is right to teach the smallest clean subagent first: fresh context, bounded task, summary return. |\n| Skill discovery and loading | `DiscoverSkillsTool`, `skills/*`, prompt sections | `s05` | Skills are not random extras. They are a selective knowledge-loading layer, so they belong before prompt and context pressure become severe. |\n| Context pressure and collapse | `services/toolUseSummary/*`, `services/contextCollapse/*`, compact logic | `s06` | The reference repo clearly has explicit compaction machinery. Teaching this before later platform features is correct. |\n| Permission gate | `types/permissions.ts`, `hooks/toolPermission/*`, approval handlers | `s07` | Execution safety is a distinct gate, not \"just another hook\". Keeping it before hooks is the right teaching choice. |\n| Hooks and side effects | `types/hooks.ts`, hook runners, lifecycle integrations | `s08` | The source separates extension points from the primary gate. Teaching them after permissions preserves that boundary. |\n| Durable memory selection | `memdir/*`, `services/SessionMemory/*`, extract/select memory helpers | `s09` | The source makes memory a selective cross-session layer, not a generic notebook. Teaching this before prompt assembly is correct. |\n| Prompt assembly | `constants/prompts.ts`, prompt sections, memory prompt loading | `s10`, `s10a` | The source builds inputs from many sections. The teaching repo is right to present prompt assembly as a pipeline instead of one giant string. |\n| Recovery and continuation | query transition reasons, retry branches, compaction retry, token recovery | `s11`, `s00c` | The reference repo has explicit continuation logic. This belongs after loop, tools, compaction, permissions, memory, and prompt assembly already exist. |\n| Durable work graph | task records, task board concepts, dependency unlocks | `s12` | The teaching repo correctly separates durable work goals from temporary session planning. |\n| Live runtime tasks | `tasks/types.ts`, `LocalShellTask`, `LocalAgentTask`, `RemoteAgentTask`, `MonitorMcpTask` | `s13`, `s13a` | The source has a clear runtime-task union. This strongly validates the teaching split between `TaskRecord` and `RuntimeTaskState`. |\n| Scheduled triggers | `ScheduleCronTool/*`, `useScheduledTasks` | `s14` | Scheduling appears after runtime work exists, which is exactly the correct dependency order. |\n| Persistent teammates | `InProcessTeammateTask`, team tools, agent registries | `s15` | The source clearly grows from one-shot subagents into durable actors. Teaching teammates later is correct. |\n| Structured team coordination | message envelopes, send-message flows, request tracking, coordinator mode | `s16` | Protocols make sense only after durable actors exist. The current order matches the real dependency. |\n| Autonomous claiming and resuming | coordinator mode, task claiming, async worker lifecycle, resume logic | `s17` | Autonomy in the source is not magic. It is layered on top of actors, tasks, and coordination rules. The current placement is correct. |\n| Worktree execution lanes | `EnterWorktreeTool`, `ExitWorktreeTool`, agent worktree helpers | `s18` | The reference repo treats worktree as an execution-lane boundary with closeout logic. Teaching it after tasks and teammates prevents concept collapse. |\n| External capability bus | `MCPTool`, `services/mcp/*`, `plugins/*`, MCP resources/prompts/tools | `s19`, `s19a` | The source clearly places MCP and plugins at the outer platform boundary. Keeping this last is the right teaching choice. |\n\n## The Most Important Validation Points\n\nThe reference repo strongly confirms five teaching choices.\n\n### 1. `s03` should stay before `s12`\n\nThe source contains both:\n\n- small session planning\n- larger durable task/runtime machinery\n\nThose are not the same thing.\n\nThe teaching repo is correct to teach:\n\n`session planning first -> durable tasks later`\n\n### 2. `s09` should stay before `s10`\n\nThe source builds the model input from multiple sources, including memory.\n\nThat means:\n\n- memory is one input source\n- prompt assembly is the pipeline that combines sources\n\nSo memory should be explained before prompt assembly.\n\n### 3. `s12` must stay before `s13`\n\nThe runtime-task union in the reference repo is one of the strongest pieces of evidence in the whole comparison.\n\nIt shows that:\n\n- durable work definitions\n- live running executions\n\nmust stay conceptually separate.\n\nIf `s13` came first, you would almost certainly merge those two layers.\n\n### 4. `s15 -> s16 -> s17` is the right order\n\nThe source has:\n\n- durable actors\n- structured coordination\n- autonomous resume / claiming behavior\n\nAutonomy depends on the first two. So the current order is correct.\n\n### 5. `s18` should stay before `s19`\n\nThe reference repo treats worktree isolation as a local execution-boundary mechanism.\n\nThat should be understood before you are asked to reason about:\n\n- external capability providers\n- MCP servers\n- plugin-installed surfaces\n\nOtherwise external capability looks more central than it really is.\n\n## What This Teaching Repo Should Still Avoid Copying\n\nThe reference repo contains many things that are real, but should still not dominate the teaching mainline:\n\n- CLI command surface area\n- UI rendering details\n- telemetry and analytics branches\n- product integration glue\n- remote and enterprise wiring\n- platform-specific compatibility code\n- line-by-line naming trivia\n\nThese are valid implementation details.\n\nThey are not the right center of a 0-to-1 teaching path.\n\n## Where The Teaching Repo Must Be Extra Careful\n\nThe mapping also reveals several places where things can easily drift into confusion.\n\n### 1. Do not merge subagents and teammates into one vague concept\n\nThe reference repo's `AgentTool` spans:\n\n- one-shot delegation\n- async/background workers\n- teammate-like persistent workers\n- worktree-isolated workers\n\nThat is exactly why the teaching repo should split the story across:\n\n- `s04`\n- `s15`\n- `s17`\n- `s18`\n\n### 2. Do not teach worktree as \"just a git trick\"\n\nThe source shows closeout, resume, cleanup, and isolation state around worktrees.\n\nSo `s18` should keep teaching:\n\n- lane identity\n- task binding\n- keep/remove closeout\n- resume and cleanup concerns\n\nnot just `git worktree add`.\n\n### 3. Do not reduce MCP to \"remote tools\"\n\nThe source includes:\n\n- tools\n- resources\n- prompts\n- elicitation / connection state\n- plugin mediation\n\nSo `s19` should keep a tools-first teaching path, but still explain the wider capability-bus boundary.\n\n## Final Judgment\n\nCompared against the high-signal module clusters in the reference repo, the current chapter order is sound.\n\nThe biggest remaining quality gains do **not** come from another major reorder.\n\nThey come from:\n\n- cleaner bridge docs\n- stronger entity-boundary explanations\n- tighter multilingual consistency\n- web pages that expose the same learning map clearly\n\n## Key Takeaway\n\n**The best teaching order is not the order files appear in a repo. It is the order in which dependencies become understandable to a learner who wants to rebuild the system.**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00f-code-reading-order",
+ "locale": "en",
+ "title": "s00f: Code Reading Order",
+ "kind": "bridge",
+ "filename": "s00f-code-reading-order.md",
+ "content": "# s00f: Code Reading Order\n\n> **Deep Dive** -- Read this when you're about to open the Python agent files and want a strategy for reading them.\n\nThis page is not about reading more code. It answers a narrower question: once the chapter order is stable, what is the cleanest order for reading this repository's code without scrambling your mental model again?\n\n## Conclusion First\n\nDo not read the code like this:\n\n- do not start with the longest file\n- do not jump straight into the most \"advanced\" chapter\n- do not open `web/` first and then guess the mainline\n- do not treat all `agents/*.py` files like one flat source pool\n\nThe stable rule is simple:\n\n**read the code in the same order as the curriculum.**\n\nInside each chapter file, keep the same reading order:\n\n1. state structures\n2. tool definitions or registries\n3. the function that advances one turn\n4. the CLI entry last\n\n## Why This Page Exists\n\nYou will probably not get lost in the prose first. You will get lost when you finally open the code and immediately start scanning the wrong things.\n\nTypical mistakes:\n\n- staring at the bottom half of a long file first\n- reading a pile of `run_*` helpers before knowing where they connect\n- jumping into late platform chapters and treating early chapters as \"too simple\"\n- collapsing `task`, `runtime task`, `teammate`, and `worktree` back into one vague idea\n\n## Use The Same Reading Template For Every Agent File\n\nFor any `agents/sXX_*.py`, read in this order:\n\n### 1. File header\n\nAnswer two questions before anything else:\n\n- what is this chapter teaching\n- what is it intentionally not teaching yet\n\n### 2. State structures or manager classes\n\nLook for things like:\n\n- `LoopState`\n- `PlanningState`\n- `CompactState`\n- `TaskManager`\n- `BackgroundManager`\n- `TeammateManager`\n- `WorktreeManager`\n\n### 3. Tool list or registry\n\nLook for:\n\n- `TOOLS`\n- `TOOL_HANDLERS`\n- `build_tool_pool()`\n- the important `run_*` entrypoints\n\n### 4. The turn-advancing function\n\nUsually this is one of:\n\n- `run_one_turn(...)`\n- `agent_loop(...)`\n- a chapter-specific `handle_*`\n\n### 5. CLI entry last\n\n`if __name__ == \"__main__\"` matters, but it should not be the first thing you study.\n\n## Stage 1: `s01-s06`\n\nThis stage is the single-agent backbone taking shape.\n\n| Chapter | File | Read First | Then Read | Confirm Before Moving On |\n|---|---|---|---|---|\n| `s01` | `agents/s01_agent_loop.py` | `LoopState` | `TOOLS` -> `execute_tool_calls()` -> `run_one_turn()` -> `agent_loop()` | You can trace `messages -> model -> tool_result -> next turn` |\n| `s02` | `agents/s02_tool_use.py` | `safe_path()` | tool handlers -> `TOOL_HANDLERS` -> `agent_loop()` | You understand how tools grow without rewriting the loop |\n| `s03` | `agents/s03_todo_write.py` | planning state types | todo handler path -> reminder injection -> `agent_loop()` | You understand visible session planning state |\n| `s04` | `agents/s04_subagent.py` | `AgentTemplate` | `run_subagent()` -> parent `agent_loop()` | You understand that subagents are mainly context isolation |\n| `s05` | `agents/s05_skill_loading.py` | skill registry types | registry methods -> `agent_loop()` | You understand discover light, load deep |\n| `s06` | `agents/s06_context_compact.py` | `CompactState` | persist / micro compact / history compact -> `agent_loop()` | You understand that compaction relocates detail instead of deleting continuity |\n\n## Stage 2: `s07-s11`\n\nThis stage hardens the control plane around a working single agent.\n\n| Chapter | File | Read First | Then Read | Confirm Before Moving On |\n|---|---|---|---|---|\n| `s07` | `agents/s07_permission_system.py` | validator / manager | permission path -> `run_bash()` -> `agent_loop()` | You understand gate before execute |\n| `s08` | `agents/s08_hook_system.py` | `HookManager` | hook registration and dispatch -> `agent_loop()` | You understand fixed extension points |\n| `s09` | `agents/s09_memory_system.py` | memory managers | save path -> prompt build -> `agent_loop()` | You understand memory as a long-term information layer |\n| `s10` | `agents/s10_system_prompt.py` | `SystemPromptBuilder` | reminder builder -> `agent_loop()` | You understand input assembly as a pipeline |\n| `s11` | `agents/s11_error_recovery.py` | compact / backoff helpers | recovery branches -> `agent_loop()` | You understand continuation after failure |\n\n## Stage 3: `s12-s14`\n\nThis stage turns the harness into a work runtime.\n\n| Chapter | File | Read First | Then Read | Confirm Before Moving On |\n|---|---|---|---|---|\n| `s12` | `agents/s12_task_system.py` | `TaskManager` | task create / dependency / unlock -> `agent_loop()` | You understand durable work goals |\n| `s13` | `agents/s13_background_tasks.py` | `NotificationQueue` / `BackgroundManager` | background registration -> notification drain -> `agent_loop()` | You understand runtime slots |\n| `s14` | `agents/s14_cron_scheduler.py` | `CronLock` / `CronScheduler` | cron match -> trigger -> `agent_loop()` | You understand future start conditions |\n\n## Stage 4: `s15-s19`\n\nThis stage is about platform boundaries.\n\n| Chapter | File | Read First | Then Read | Confirm Before Moving On |\n|---|---|---|---|---|\n| `s15` | `agents/s15_agent_teams.py` | `MessageBus` / `TeammateManager` | roster / inbox / loop -> `agent_loop()` | You understand persistent teammates |\n| `s16` | `agents/s16_team_protocols.py` | `RequestStore` / `TeammateManager` | request handlers -> `agent_loop()` | You understand request-response plus `request_id` |\n| `s17` | `agents/s17_autonomous_agents.py` | claim and identity helpers | claim path -> resume path -> `agent_loop()` | You understand idle check -> safe claim -> resume work |\n| `s18` | `agents/s18_worktree_task_isolation.py` | `TaskManager` / `WorktreeManager` / `EventBus` | worktree lifecycle -> `agent_loop()` | You understand goals versus execution lanes |\n| `s19` | `agents/s19_mcp_plugin.py` | capability gate / MCP client / plugin loader / router | tool pool build -> route -> normalize -> `agent_loop()` | You understand how external capability enters the same control plane |\n\n## Best Doc + Code Loop\n\nFor each chapter:\n\n1. read the chapter prose\n2. read the bridge note for that chapter\n3. open the matching `agents/sXX_*.py`\n4. follow the order: state -> tools -> turn driver -> CLI entry\n5. run the demo once\n6. rewrite the smallest version from scratch\n\n## Key Takeaway\n\n**Code reading order must obey teaching order: read boundaries first, then state, then the path that advances the loop.**\n"
+ },
{
"version": "s01",
+ "slug": "s01-the-agent-loop",
"locale": "en",
"title": "s01: The Agent Loop",
- "content": "# s01: The Agent Loop\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- one tool + one loop = an agent.\n\n## Problem\n\nA language model can reason about code, but it can't *touch* the real world -- can't read files, run tests, or check errors. Without a loop, every tool call requires you to manually copy-paste results back. You become the loop.\n\n## Solution\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\nOne exit condition controls the entire flow. The loop runs until the model stops calling tools.\n\n## How It Works\n\n1. User prompt becomes the first message.\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. Send messages + tool definitions to the LLM.\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. Append the assistant response. Check `stop_reason` -- if the model didn't call a tool, we're done.\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. Execute each tool call, collect results, append as a user message. Loop back to step 2.\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\nAssembled into one function:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nThat's the entire agent in under 30 lines. Everything else in this course layers on top -- without changing the loop.\n\n## What Changed\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n"
+ "kind": "chapter",
+ "filename": "s01-the-agent-loop.md",
+ "content": "# s01: The Agent Loop\n\n`[ s01 ] > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n\n- How the core agent loop works: send messages, run tools, feed results back\n- Why the \"write-back\" step is the single most important idea in agent design\n- How to build a working agent in under 30 lines of Python\n\nImagine you have a brilliant assistant who can reason about code, plan solutions, and write great answers -- but cannot touch anything. Every time it suggests running a command, you have to copy it, run it yourself, paste the output back, and wait for the next suggestion. You are the loop. This chapter removes you from that loop.\n\n## The Problem\n\nWithout a loop, every tool call requires a human in the middle. The model says \"run this test.\" You run it. You paste the output. The model says \"now fix line 12.\" You fix it. You tell the model what happened. This manual back-and-forth might work for a single question, but it falls apart completely when a task requires 10, 20, or 50 tool calls in a row.\n\nThe solution is simple: let the code do the looping.\n\n## The Solution\n\nHere's the entire system in one picture:\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until the model stops calling tools)\n```\n\nThe model talks, the harness (the code wrapping the model) executes tools, and the results go right back into the conversation. The loop keeps spinning until the model decides it's done.\n\n## How It Works\n\n**Step 1.** The user's prompt becomes the first message.\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n**Step 2.** Send the conversation to the model, along with tool definitions.\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n**Step 3.** Add the model's response to the conversation. Then check: did it call a tool, or is it done?\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\n\n# If the model didn't call a tool, the task is finished\nif response.stop_reason != \"tool_use\":\n return\n```\n\n**Step 4.** Execute each tool call, collect the results, and put them back into the conversation as a new message. Then loop back to Step 2.\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id, # links result to the tool call\n \"content\": output,\n })\n# This is the \"write-back\" -- the model can now see the real-world result\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\nPut it all together, and the entire agent fits in one function:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return # model is done\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nThat's the entire agent in under 30 lines. Everything else in this course layers on top of this loop -- without changing its core shape.\n\n> **A note about real systems:** Production agents typically use streaming responses, where the model's output arrives token by token instead of all at once. That changes the user experience (you see text appearing in real time), but the fundamental loop -- send, execute, write back -- stays exactly the same. We skip streaming here to keep the core idea crystal clear.\n\n## What Changed\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Build a working agent loop from scratch\n- Explain why tool results must flow back into the conversation (the \"write-back\")\n- Redraw the loop from memory: messages -> model -> tool execution -> write-back -> next turn\n\n## What's Next\n\nRight now, the agent can only run bash commands. That means every file read uses `cat`, every edit uses `sed`, and there's no safety boundary at all. In the next chapter, you'll add dedicated tools with a clean routing system -- and the loop itself won't need to change at all.\n\n## Key Takeaway\n\n> An agent is just a loop: send messages to the model, execute the tools it asks for, feed the results back, and repeat until it's done.\n"
},
{
"version": "s02",
+ "slug": "s02-tool-use",
"locale": "en",
"title": "s02: Tool Use",
- "content": "# s02: Tool Use\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Adding a tool means adding one handler\"* -- the loop stays the same; new tools register into the dispatch map.\n\n## Problem\n\nWith only `bash`, the agent shells out for everything. `cat` truncates unpredictably, `sed` fails on special characters, and every bash call is an unconstrained security surface. Dedicated tools like `read_file` and `write_file` let you enforce path sandboxing at the tool level.\n\nThe key insight: adding tools does not require changing the loop.\n\n## Solution\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## How It Works\n\n1. Each tool gets a handler function. Path sandboxing prevents workspace escape.\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. The dispatch map links tool names to handlers.\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. In the loop, look up the handler by name. The loop body itself is unchanged from s01.\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nAdd a tool = add a handler + add a schema entry. The loop never changes.\n\n## What Changed From s01\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n"
+ "kind": "chapter",
+ "filename": "s02-tool-use.md",
+ "content": "# s02: Tool Use\n\n`s01 > [ s02 ] > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n\n- How to build a dispatch map (a routing table that maps tool names to handler functions)\n- How path sandboxing prevents the model from escaping its workspace\n- How to add new tools without touching the agent loop\n\nIf you ran the s01 agent for more than a few minutes, you probably noticed the cracks. `cat` silently truncates long files. `sed` chokes on special characters. Every bash command is an open door -- nothing stops the model from running `rm -rf /` or reading your SSH keys. You need dedicated tools with guardrails, and you need a clean way to add them.\n\n## The Problem\n\nWith only `bash`, the agent shells out for everything. There is no way to limit what it reads, where it writes, or how much output it returns. A single bad command can corrupt files, leak secrets, or blow past your token budget with a massive stdout dump. What you really want is a small set of purpose-built tools -- `read_file`, `write_file`, `edit_file` -- each with its own safety checks. The question is: how do you wire them in without rewriting the loop every time?\n\n## The Solution\n\nThe answer is a dispatch map -- one dictionary that routes tool names to handler functions. Adding a tool means adding one entry. The loop itself never changes.\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## How It Works\n\n**Step 1.** Each tool gets a handler function. Path sandboxing prevents the model from escaping the workspace -- every requested path is resolved and checked against the working directory before any I/O happens.\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000] # hard cap to avoid blowing up the context\n```\n\n**Step 2.** The dispatch map links tool names to handlers. This is the entire routing layer -- no if/elif chain, no class hierarchy, just a dictionary.\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n**Step 3.** In the loop, look up the handler by name. The loop body itself is unchanged from s01 -- only the dispatch line is new.\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nAdd a tool = add a handler + add a schema entry. The loop never changes.\n\n## What Changed From s01\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Wire any new tool into the agent by adding one handler and one schema entry -- without touching the loop.\n- Enforce path sandboxing so the model cannot read or write outside its workspace.\n- Explain why a dispatch map scales better than an if/elif chain.\n\nKeep the boundary clean: a tool schema is enough for now. You do not need policy layers, approval UIs, or plugin ecosystems yet. If you can add one new tool without rewriting the loop, you have the core pattern down.\n\n## What's Next\n\nYour agent can now read, write, and edit files safely. But what happens when you ask it to do a 10-step refactoring? It finishes steps 1 through 3 and then starts improvising because it forgot the rest. In s03, you will give the agent a session plan -- a structured todo list that keeps it on track through complex, multi-step tasks.\n\n## Key Takeaway\n\n> The loop should not care how a tool works internally. It only needs a reliable route from tool name to handler.\n"
+ },
+ {
+ "version": null,
+ "slug": "s02a-tool-control-plane",
+ "locale": "en",
+ "title": "s02a: Tool Control Plane",
+ "kind": "bridge",
+ "filename": "s02a-tool-control-plane.md",
+ "content": "# s02a: Tool Control Plane\n\n> **Deep Dive** -- Best read after s02 and before s07. It shows why tools become more than a simple lookup table.\n\n### When to Read This\n\nAfter you understand basic tool dispatch and before you add permissions.\n\n---\n\n> This bridge document answers another key question:\n>\n> **Why is a tool system more than a `tool_name -> handler` table?**\n\n## Why This Document Exists\n\n`s02` correctly teaches tool registration and dispatch first.\n\nThat is the right teaching move because you should first understand how the model turns intent into action.\n\nBut later the tool layer starts carrying much more responsibility:\n\n- permission checks\n- MCP routing\n- notifications\n- shared runtime state\n- message access\n- app state\n- capability-specific restrictions\n\nAt that point, the tool layer is no longer just a function table.\n\nIt becomes a control plane (the coordination layer that decides *how* each tool call gets routed and executed, rather than performing the tool work itself).\n\n## Terms First\n\n### Tool control plane\n\nThe part of the system that decides **how** a tool call executes:\n\n- where it runs\n- whether it is allowed\n- what state it can access\n- whether it is native or external\n\n### Execution context\n\nThe runtime environment visible to the tool:\n\n- current working directory\n- current permission mode\n- current messages\n- available MCP clients\n- app state and notification channels\n\n### Capability source\n\nNot every tool comes from the same place. Common sources:\n\n- native local tools\n- MCP tools\n- agent/team/task/worktree platform tools\n\n## The Smallest Useful Mental Model\n\nThink of the tool system as four layers:\n\n```text\n1. ToolSpec\n what the model sees\n\n2. Tool Router\n where the request gets sent\n\n3. ToolUseContext\n what environment the tool can access\n\n4. Tool Result Envelope\n how the output returns to the main loop\n```\n\nThe biggest step up is layer 3:\n\n**high-completion systems are defined less by the dispatch table and more by the shared execution context.**\n\n## Core Structures\n\n### `ToolSpec`\n\n```python\ntool = {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {...},\n}\n```\n\n### `ToolDispatchMap`\n\n```python\nhandlers = {\n \"read_file\": read_file,\n \"write_file\": write_file,\n \"bash\": run_bash,\n}\n```\n\nNecessary, but not sufficient.\n\n### `ToolUseContext`\n\n```python\ntool_use_context = {\n \"tools\": handlers,\n \"permission_context\": {...},\n \"mcp_clients\": {},\n \"messages\": [...],\n \"app_state\": {...},\n \"notifications\": [],\n \"cwd\": \"...\",\n}\n```\n\nThe key point:\n\nTools stop receiving only input parameters.\nThey start receiving a shared runtime environment.\n\n### `ToolResultEnvelope`\n\n```python\nresult = {\n \"ok\": True,\n \"content\": \"...\",\n \"is_error\": False,\n \"attachments\": [],\n}\n```\n\nThis makes it easier to support:\n\n- plain text output\n- structured output\n- error output\n- attachment-like results\n\n## Why `ToolUseContext` Eventually Becomes Necessary\n\nCompare two systems.\n\n### System A: dispatch map only\n\n```python\noutput = handlers[tool_name](**tool_input)\n```\n\nFine for a demo.\n\n### System B: dispatch map plus execution context\n\n```python\noutput = handlers[tool_name](tool_input, tool_use_context)\n```\n\nCloser to a real platform.\n\nWhy?\n\nBecause now:\n\n- `bash` needs permissions\n- `mcp__...` needs a client\n- `agent` tools need execution environment setup\n- `task_output` may need file writes plus notification write-back\n\n## Minimal Implementation Path\n\n### 1. Keep `ToolSpec` and handlers\n\nDo not throw away the simple model.\n\n### 2. Introduce one shared context object\n\n```python\nclass ToolUseContext:\n def __init__(self):\n self.handlers = {}\n self.permission_context = {}\n self.mcp_clients = {}\n self.messages = []\n self.app_state = {}\n self.notifications = []\n```\n\n### 3. Let all handlers receive the context\n\n```python\ndef run_tool(tool_name: str, tool_input: dict, ctx: ToolUseContext):\n handler = ctx.handlers[tool_name]\n return handler(tool_input, ctx)\n```\n\n### 4. Route by capability source\n\n```python\ndef route_tool(tool_name: str, tool_input: dict, ctx: ToolUseContext):\n if tool_name.startswith(\"mcp__\"):\n return run_mcp_tool(tool_name, tool_input, ctx)\n return run_native_tool(tool_name, tool_input, ctx)\n```\n\n## Key Takeaway\n\n**A mature tool system is not just a name-to-function map. It is a shared execution plane that decides how model action intent becomes real work.**\n"
+ },
+ {
+ "version": null,
+ "slug": "s02b-tool-execution-runtime",
+ "locale": "en",
+ "title": "s02b: Tool Execution Runtime",
+ "kind": "bridge",
+ "filename": "s02b-tool-execution-runtime.md",
+ "content": "# s02b: Tool Execution Runtime\n\n> **Deep Dive** -- Best read after s02, when you want to understand concurrent tool execution.\n\n### When to Read This\n\nWhen you start wondering how multiple tool calls in one turn get executed safely.\n\n---\n\n> This bridge note is not about how tools are registered.\n>\n> It is about a deeper question:\n>\n> **When the model emits multiple tool calls, what rules decide concurrency, progress updates, result ordering, and context merging?**\n\n## Why This Note Exists\n\n`s02` correctly teaches:\n\n- tool schema\n- dispatch map\n- `tool_result` flowing back into the loop\n\nThat is the right starting point.\n\nBut once the system grows, the hard questions move one layer deeper:\n\n- which tools can run in parallel\n- which tools should stay serial\n- whether long-running tools should emit progress first\n- whether concurrent results should write back in completion order or original order\n- whether tool execution mutates shared context\n- how concurrent mutations should merge safely\n\nThose questions are not about registration anymore.\n\nThey belong to the **tool execution runtime** -- the set of rules the system follows once tool calls actually start executing, including scheduling, tracking, yielding progress, and merging results.\n\n## Terms First\n\n### What \"tool execution runtime\" means here\n\nThis is not the programming language runtime.\n\nHere it means:\n\n> the rules the system uses once tool calls actually start executing\n\nThose rules include scheduling, tracking, yielding progress, and merging results.\n\n### What \"concurrency safe\" means\n\nA tool is concurrency safe when:\n\n> it can run alongside similar work without corrupting shared state\n\nTypical read-only tools are often safe:\n\n- `read_file`\n- some search tools\n- query-only MCP tools\n\nMany write tools are not:\n\n- `write_file`\n- `edit_file`\n- tools that modify shared application state\n\n### What a progress message is\n\nA progress message means:\n\n> the tool is not done yet, but the system already surfaces what it is doing\n\nThis keeps the user informed during long-running operations rather than leaving them staring at silence.\n\n### What a context modifier is\n\nSome tools do more than return text.\n\nThey also modify shared runtime context, for example:\n\n- update a notification queue\n- record active tools\n- mutate app state\n\nThat shared-state mutation is called a context modifier.\n\n## The Minimum Mental Model\n\nDo not flatten tool execution into:\n\n```text\ntool_use -> handler -> result\n```\n\nA better mental model is:\n\n```text\ntool_use blocks\n ->\npartition by concurrency safety\n ->\nchoose concurrent or serial execution\n ->\nemit progress if needed\n ->\nwrite results back in stable order\n ->\nmerge queued context modifiers\n```\n\nTwo upgrades matter most:\n\n- concurrency is not \"all tools run together\"\n- shared context should not be mutated in random completion order\n\n## Core Records\n\n### 1. `ToolExecutionBatch`\n\nA minimal teaching batch can look like:\n\n```python\nbatch = {\n \"is_concurrency_safe\": True,\n \"blocks\": [tool_use_1, tool_use_2, tool_use_3],\n}\n```\n\nThe point is simple:\n\n- tools are not always handled one by one\n- the runtime groups them into execution batches first\n\n### 2. `TrackedTool`\n\nIf you want a higher-completion execution layer, track each tool explicitly:\n\n```python\ntracked_tool = {\n \"id\": \"toolu_01\",\n \"name\": \"read_file\",\n \"status\": \"queued\", # queued / executing / completed / yielded\n \"is_concurrency_safe\": True,\n \"pending_progress\": [],\n \"results\": [],\n \"context_modifiers\": [],\n}\n```\n\nThis makes the runtime able to answer:\n\n- what is still waiting\n- what is already running\n- what has completed\n- what has already yielded progress\n\n### 3. `MessageUpdate`\n\nTool execution may produce more than one final result.\n\nA minimal update can be treated as:\n\n```python\nupdate = {\n \"message\": maybe_message,\n \"new_context\": current_context,\n}\n```\n\nIn a larger runtime, updates usually split into two channels:\n\n- messages that should surface upstream immediately\n- context changes that should stay internal until merge time\n\n### 4. Queued context modifiers\n\nThis is easy to skip, but it is one of the most important ideas.\n\nIn a concurrent batch, the safer strategy is not:\n\n> \"whichever tool finishes first mutates shared context first\"\n\nThe safer strategy is:\n\n> queue context modifiers first, then merge them later in the original tool order\n\nFor example:\n\n```python\nqueued_context_modifiers = {\n \"toolu_01\": [modify_ctx_a],\n \"toolu_02\": [modify_ctx_b],\n}\n```\n\n## Minimum Implementation Steps\n\n### Step 1: classify concurrency safety\n\n```python\ndef is_concurrency_safe(tool_name: str, tool_input: dict) -> bool:\n return tool_name in {\"read_file\", \"search_files\"}\n```\n\n### Step 2: partition before execution\n\n```python\nbatches = partition_tool_calls(tool_uses)\n\nfor batch in batches:\n if batch[\"is_concurrency_safe\"]:\n run_concurrently(batch[\"blocks\"])\n else:\n run_serially(batch[\"blocks\"])\n```\n\n### Step 3: let concurrent batches emit progress\n\n```python\nfor update in run_concurrently(...):\n if update.get(\"message\"):\n yield update[\"message\"]\n```\n\n### Step 4: merge context in stable order\n\n```python\nqueued_modifiers = {}\n\nfor update in concurrent_updates:\n if update.get(\"context_modifier\"):\n queued_modifiers[update[\"tool_id\"]].append(update[\"context_modifier\"])\n\nfor tool in original_batch_order:\n for modifier in queued_modifiers.get(tool[\"id\"], []):\n context = modifier(context)\n```\n\nThis is one of the places where a teaching repo can still stay simple while remaining honest about the real system shape.\n\n## The Picture You Should Hold\n\n```text\ntool_use blocks\n |\n v\npartition by concurrency safety\n |\n +-- safe batch ----------> concurrent execution\n | |\n | +-- progress updates\n | +-- final results\n | +-- queued context modifiers\n |\n +-- exclusive batch -----> serial execution\n |\n +-- direct result\n +-- direct context update\n```\n\n## Why This Matters More Than the Dispatch Map\n\nIn a tiny demo:\n\n```python\nhandlers[tool_name](tool_input)\n```\n\nis enough.\n\nBut in a higher-completion agent, the hard part is no longer calling the right handler.\n\nThe hard part is:\n\n- scheduling multiple tools safely\n- keeping progress visible\n- making result ordering stable\n- preventing shared context from becoming nondeterministic\n\nThat is why tool execution runtime deserves its own deep dive.\n\n## Key Takeaway\n\n**Once the model emits multiple tool calls per turn, the hard problem shifts from dispatch to safe concurrent execution with stable result ordering.**\n"
},
{
"version": "s03",
+ "slug": "s03-todo-write",
"locale": "en",
"title": "s03: TodoWrite",
- "content": "# s03: TodoWrite\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"An agent without a plan drifts\"* -- list the steps first, then execute.\n\n## Problem\n\nOn multi-step tasks, the model loses track. It repeats work, skips steps, or wanders off. Long conversations make this worse -- the system prompt fades as tool results fill the context. A 10-step refactoring might complete steps 1-3, then the model starts improvising because it forgot steps 4-10.\n\n## Solution\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## How It Works\n\n1. TodoManager stores items with statuses. Only one item can be `in_progress` at a time.\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. The `todo` tool goes into the dispatch map like any other tool.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. A nag reminder injects a nudge if the model goes 3+ rounds without calling `todo`.\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos. \",\n })\n```\n\nThe \"one in_progress at a time\" constraint forces sequential focus. The nag reminder creates accountability.\n\n## What Changed From s02\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|----------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses |\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n"
+ "kind": "chapter",
+ "filename": "s03-todo-write.md",
+ "content": "# s03: TodoWrite\n\n`s01 > s02 > [ s03 ] > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n\n- How session planning keeps the model on track during multi-step tasks\n- How a structured todo list with status tracking replaces fragile free-form plans\n- How gentle reminders (nag injection) pull the model back when it drifts\n\nHave you ever asked an AI to do a complex task and watched it lose track halfway through? You say \"refactor this module: add type hints, docstrings, tests, and a main guard\" and it nails the first two steps, then wanders off into something you never asked for. This is not a model intelligence problem -- it is a working memory problem. As tool results pile up in the conversation, the original plan fades. By step 4, the model has effectively forgotten steps 5 through 10. You need a way to keep the plan visible.\n\n## The Problem\n\nOn multi-step tasks, the model drifts. It repeats work, skips steps, or improvises once the system prompt fades behind pages of tool output. The context window (the total amount of text the model can hold in working memory at once) is finite, and earlier instructions get pushed further away with every tool call. A 10-step refactoring might complete steps 1-3, then the model starts making things up because it simply cannot \"see\" steps 4-10 anymore.\n\n## The Solution\n\nGive the model a `todo` tool that maintains a structured checklist. Then inject gentle reminders when the model goes too long without updating its plan.\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## How It Works\n\n**Step 1.** TodoManager stores items with statuses. The \"one `in_progress` at a time\" constraint forces the model to finish what it started before moving on.\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render() # returns the checklist as formatted text\n```\n\n**Step 2.** The `todo` tool goes into the dispatch map like any other tool -- no special wiring needed, just one more entry in the dictionary you built in s02.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n**Step 3.** A nag reminder injects a nudge if the model goes 3+ rounds without calling `todo`. This is the write-back trick (feeding tool results back into the conversation) used for a new purpose: the harness (the code wrapping around the model) quietly inserts a reminder into the results payload before it is appended to messages.\n\n```python\nif rounds_since_todo >= 3:\n results.insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos. \",\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\nThe \"one in_progress at a time\" constraint forces sequential focus. The nag reminder creates accountability. Together, they keep the model working through its plan instead of drifting.\n\n## What Changed From s02\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|----------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses |\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n\nWatch the model create a plan, work through it step by step, and check off items as it goes. If it forgets to update the plan for a few rounds, you will see the `` nudge appear in the conversation.\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Add session planning to any agent by dropping a `todo` tool into the dispatch map.\n- Enforce sequential focus with the \"one in_progress at a time\" constraint.\n- Use nag injection to pull the model back on track when it drifts.\n- Explain why structured state beats free-form prose for multi-step plans.\n\nKeep three boundaries in mind: `todo` here means \"plan for the current conversation\", not a durable task database. The tiny schema `{id, text, status}` is enough. A direct reminder is enough -- you do not need a sophisticated planning UI yet.\n\n## What's Next\n\nYour agent can now plan its work and stay on track. But every file it reads, every bash output it produces -- all of it stays in the conversation forever, eating into the context window. A five-file investigation might burn thousands of tokens (roughly word-sized pieces -- a 1000-line file uses about 4000 tokens) that the parent conversation never needs again. In s04, you will learn how to spin up subagents with fresh, isolated context -- so the parent stays clean and the model stays sharp.\n\n## Key Takeaway\n\n> Once the plan lives in structured state instead of free-form prose, the agent drifts much less.\n"
},
{
"version": "s04",
+ "slug": "s04-subagent",
"locale": "en",
"title": "s04: Subagents",
- "content": "# s04: Subagents\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Break big tasks down; each subtask gets a clean context\"* -- subagents use independent messages[], keeping the main conversation clean.\n\n## Problem\n\nAs the agent works, its messages array grows. Every file read, every bash output stays in context permanently. \"What testing framework does this project use?\" might require reading 5 files, but the parent only needs the answer: \"pytest.\"\n\n## Solution\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## How It Works\n\n1. The parent gets a `task` tool. The child gets all base tools except `task` (no recursive spawning).\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. The subagent starts with `messages=[]` and runs its own loop. Only the final text returns to the parent.\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\nThe child's entire message history (possibly 30+ tool calls) is discarded. The parent receives a one-paragraph summary as a normal `tool_result`.\n\n## What Changed From s03\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n"
+ "kind": "chapter",
+ "filename": "s04-subagent.md",
+ "content": "# s04: Subagents\n\n`s01 > s02 > s03 > [ s04 ] > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n- Why exploring a side question can pollute the parent agent's context\n- How a subagent gets a fresh, empty message history\n- How only a short summary travels back to the parent\n- Why the child's full message history is discarded after use\n\nImagine you ask your agent \"What testing framework does this project use?\" To answer, it reads five files, parses config blocks, and compares import statements. All of that exploration is useful for a moment -- but once the answer is \"pytest,\" you really don't want those five file dumps sitting in the conversation forever. Every future API call now carries that dead weight, burning tokens and distracting the model. You need a way to ask a side question in a clean room and bring back only the answer.\n\n## The Problem\n\nAs the agent works, its `messages` array grows. Every file read, every bash output stays in context permanently. A simple question like \"what testing framework is this?\" might require reading five files, but the parent only needs one word back: \"pytest.\" Without isolation, those intermediate artifacts stay in context for the rest of the session, wasting tokens on every subsequent API call and muddying the model's attention. The longer a session runs, the worse this gets -- context fills with exploration debris that has nothing to do with the current task.\n\n## The Solution\n\nThe parent agent delegates side tasks to a child agent that starts with an empty `messages=[]`. The child does all the messy exploration, then only its final text summary travels back. The child's full history is discarded.\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## How It Works\n\n**Step 1.** The parent gets a `task` tool that the child does not. This prevents recursive spawning -- a child cannot create its own children.\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n**Step 2.** The subagent starts with `messages=[]` and runs its own agent loop. Only the final text block returns to the parent as a `tool_result`.\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n # Extract only the final text -- everything else is thrown away\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\nThe child's entire message history (possibly 30+ tool calls worth of file reads and bash outputs) is discarded the moment `run_subagent` returns. The parent receives a one-paragraph summary as a normal `tool_result`, keeping its own context clean.\n\n## What Changed From s03\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Explain why a subagent is primarily a **context boundary**, not a process trick\n- Spawn a one-shot child agent with a fresh `messages=[]`\n- Return only a summary to the parent, discarding all intermediate exploration\n- Decide which tools the child should and should not have access to\n\nYou don't need long-lived workers, resumable sessions, or worktree isolation yet. The core idea is simple: give the subtask a clean workspace in memory, then bring back only the answer the parent still needs.\n\n## What's Next\n\nSo far you've learned to keep context clean by isolating side tasks. But what about the knowledge the agent carries in the first place? In s05, you'll see how to avoid bloating the system prompt with domain expertise the model might never use -- loading skills on demand instead of upfront.\n\n## Key Takeaway\n\n> A subagent is a disposable scratch pad: fresh context in, short summary out, everything else discarded.\n"
},
{
"version": "s05",
+ "slug": "s05-skill-loading",
"locale": "en",
"title": "s05: Skills",
- "content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Load knowledge when you need it, not upfront\"* -- inject via tool_result, not the system prompt.\n\n## Problem\n\nYou want the agent to follow domain-specific workflows: git conventions, testing patterns, code review checklists. Putting everything in the system prompt wastes tokens on unused skills. 10 skills at 2000 tokens each = 20,000 tokens, most of which are irrelevant to any given task.\n\n## Solution\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\nLayer 1: skill *names* in system prompt (cheap). Layer 2: full *body* via tool_result (on demand).\n\n## How It Works\n\n1. Each skill is a directory containing a `SKILL.md` with YAML frontmatter.\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoader scans for `SKILL.md` files, uses the directory name as the skill identifier.\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n \"\n```\n\n3. Layer 1 goes into the system prompt. Layer 2 is just another tool handler.\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nThe model learns what skills exist (cheap) and loads them when relevant (expensive).\n\n## What Changed From s04\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n"
+ "kind": "chapter",
+ "filename": "s05-skill-loading.md",
+ "content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n- Why stuffing all domain knowledge into the system prompt wastes tokens\n- The two-layer loading pattern: cheap names up front, expensive bodies on demand\n- How frontmatter (YAML metadata at the top of a file) gives each skill a name and description\n- How the model decides for itself which skill to load and when\n\nYou don't memorize every recipe in every cookbook you own. You know which shelf each cookbook sits on, and you pull one down only when you're actually cooking that dish. An agent's domain knowledge works the same way. You might have expertise files for git workflows, testing patterns, code review checklists, PDF processing -- dozens of topics. Loading all of them into the system prompt on every request is like reading every cookbook cover to cover before cracking a single egg. Most of that knowledge is irrelevant to any given task.\n\n## The Problem\n\nYou want your agent to follow domain-specific workflows: git conventions, testing best practices, code review checklists. The naive approach is to put everything in the system prompt. But 10 skills at 2,000 tokens each means 20,000 tokens of instructions on every API call -- most of which have nothing to do with the current question. You pay for those tokens every turn, and worse, all that irrelevant text competes for the model's attention with the content that actually matters.\n\n## The Solution\n\nSplit knowledge into two layers. Layer 1 lives in the system prompt and is cheap: just skill names and one-line descriptions (~100 tokens per skill). Layer 2 is the full skill body, loaded on demand through a tool call only when the model decides it needs that knowledge.\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n## How It Works\n\n**Step 1.** Each skill is a directory containing a `SKILL.md` file. The file starts with YAML frontmatter (a metadata block delimited by `---` lines) that declares the skill's name and description, followed by the full instruction body.\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n**Step 2.** `SkillLoader` scans for all `SKILL.md` files at startup. It parses the frontmatter to extract names and descriptions, and stores the full body for later retrieval.\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n # Use the frontmatter name, or fall back to the directory name\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n \"\"\"Layer 1: cheap one-liners for the system prompt.\"\"\"\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n \"\"\"Layer 2: full body, returned as a tool_result.\"\"\"\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n \"\n```\n\n**Step 3.** Layer 1 goes into the system prompt so the model always knows what skills exist. Layer 2 is wired up as a normal tool handler -- the model calls `load_skill` when it decides it needs the full instructions.\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nThe model learns what skills exist (cheap, ~100 tokens each) and loads them only when relevant (expensive, ~2000 tokens each). On a typical turn, only one skill is loaded instead of all ten.\n\n## What Changed From s04\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Explain why \"list first, load later\" beats stuffing everything into the system prompt\n- Write a `SKILL.md` with YAML frontmatter that a `SkillLoader` can discover\n- Wire up two-layer loading: cheap descriptions in the system prompt, full bodies via `tool_result`\n- Let the model decide for itself when domain knowledge is worth loading\n\nYou don't need skill ranking systems, multi-provider merging, parameterized templates, or recovery-time restoration rules. The core pattern is simple: advertise cheaply, load on demand.\n\n## What's Next\n\nYou now know how to keep knowledge out of context until it's needed. But what happens when context grows large anyway -- after dozens of turns of real work? In s06, you'll learn how to compress a long conversation down to its essentials so the agent can keep working without hitting token limits.\n\n## Key Takeaway\n\n> Advertise skill names cheaply in the system prompt; load the full body through a tool call only when the model actually needs it.\n"
},
{
"version": "s06",
+ "slug": "s06-context-compact",
"locale": "en",
"title": "s06: Context Compact",
- "content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Context will fill up; you need a way to make room\"* -- three-layer compression strategy for infinite sessions.\n\n## Problem\n\nThe context window is finite. A single `read_file` on a 1000-line file costs ~4000 tokens. After reading 30 files and running 20 bash commands, you hit 100,000+ tokens. The agent cannot work on large codebases without compression.\n\n## Solution\n\nThree layers, increasing in aggressiveness:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## How It Works\n\n1. **Layer 1 -- micro_compact**: Before each LLM call, replace old tool results with placeholders.\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **Layer 2 -- auto_compact**: When tokens exceed threshold, save full transcript to disk, then ask the LLM to summarize.\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **Layer 3 -- manual compact**: The `compact` tool triggers the same summarization on demand.\n\n4. The loop integrates all three:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\nTranscripts preserve full history on disk. Nothing is truly lost -- just moved out of active context.\n\n## What Changed From s05\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (watch micro-compact replace old results)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n"
+ "kind": "chapter",
+ "filename": "s06-context-compact.md",
+ "content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n\n- Why long sessions inevitably run out of context space, and what happens when they do\n- A four-lever compression strategy: persisted output, micro-compact, auto-compact, and manual compact\n- How to move detail out of active memory without losing it\n- How to keep a session alive indefinitely by summarizing and continuing\n\nYour agent from s05 is capable. It reads files, runs commands, edits code, and delegates subtasks. But try something ambitious -- ask it to refactor a module that touches 30 files. After reading all of them and running 20 shell commands, you will notice the responses get worse. The model starts forgetting what it already read. It repeats work. Eventually the API rejects your request entirely. You have hit the context window limit, and without a plan for that, your agent is stuck.\n\n## The Problem\n\nEvery API call to the model includes the entire conversation so far: every user message, every assistant response, every tool call and its result. The model's context window (the total amount of text it can hold in working memory at once) is finite. A single `read_file` on a 1000-line source file costs roughly 4,000 tokens (roughly word-sized pieces -- a 1,000-line file uses about 4,000 tokens). Read 30 files and run 20 bash commands, and you have burned through 100,000+ tokens. The context is full, but the work is only half done.\n\nThe naive fix -- just truncating old messages -- throws away information the agent might need later. A smarter approach compresses strategically: keep the important bits, move the bulky details to disk, and summarize when the conversation gets too long. That is what this chapter builds.\n\n## The Solution\n\nWe use four levers, each working at a different stage of the pipeline, from output-time filtering to full conversation summarization.\n\n```\nEvery tool call:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Lever 0: persisted-output] (at tool execution time)\n Large outputs (>50KB, bash >30KB) are written to disk\n and replaced with a preview marker.\n |\n v\n[Lever 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n (preserves read_file results as reference material)\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Lever 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Lever 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## How It Works\n\n### Step 1: Lever 0 -- Persisted Output\n\nThe first line of defense runs at tool execution time, before a result even enters the conversation. When a tool result exceeds a size threshold, we write the full output to disk and replace it with a short preview. This prevents a single giant command output from consuming half the context window.\n\n```python\nPERSIST_OUTPUT_TRIGGER_CHARS_DEFAULT = 50000\nPERSIST_OUTPUT_TRIGGER_CHARS_BASH = 30000 # bash uses a lower threshold\n\ndef maybe_persist_output(tool_use_id, output, trigger_chars=None):\n if len(output) <= trigger:\n return output # small enough -- keep inline\n stored_path = _persist_tool_result(tool_use_id, output)\n return _build_persisted_marker(stored_path, output) # swap in a compact preview\n # Returns: \n # Output too large (48.8KB). Full output saved to: .task_outputs/tool-results/abc123.txt\n # Preview (first 2.0KB):\n # ... first 2000 chars ...\n # \n```\n\nThe model can later `read_file` the stored path to access the full content if needed. Nothing is lost -- the detail just lives on disk instead of in the conversation.\n\n### Step 2: Lever 1 -- Micro-Compact\n\nBefore each LLM call, we scan for old tool results and replace them with one-line placeholders. This is invisible to the user and runs every turn. The key subtlety: we preserve `read_file` results because those serve as reference material the model often needs to look back at.\n\n```python\nPRESERVE_RESULT_TOOLS = {\"read_file\"}\n\ndef micro_compact(messages: list) -> list:\n tool_results = [...] # collect all tool_result entries\n if len(tool_results) <= KEEP_RECENT:\n return messages # not enough results to compact yet\n for part in tool_results[:-KEEP_RECENT]:\n if tool_name in PRESERVE_RESULT_TOOLS:\n continue # keep reference material\n part[\"content\"] = f\"[Previous: used {tool_name}]\" # replace with short placeholder\n return messages\n```\n\n### Step 3: Lever 2 -- Auto-Compact\n\nWhen micro-compaction is not enough and the token count crosses a threshold, the harness takes a bigger step: it saves the full transcript to disk for recovery, asks the LLM to summarize the entire conversation, and then replaces all messages with that summary. The agent continues from the summary as if nothing happened.\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}], # cap at 80K chars for the summary call\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n ]\n```\n\n### Step 4: Lever 3 -- Manual Compact\n\nThe `compact` tool lets the model itself trigger summarization on demand. It uses exactly the same mechanism as auto-compact. The difference is who decides: auto-compact fires on a threshold, manual compact fires when the agent judges it is the right time to compress.\n\n### Step 5: Integration in the Agent Loop\n\nAll four levers compose naturally inside the main loop:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Lever 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Lever 2\n response = client.messages.create(...)\n # ... tool execution with persisted-output ... # Lever 0\n if manual_compact:\n messages[:] = auto_compact(messages) # Lever 3\n```\n\nTranscripts preserve full history on disk. Large outputs are saved to `.task_outputs/tool-results/`. Nothing is truly lost -- just moved out of active context.\n\n## What Changed From s05\n\n| Component | Before (s05) | After (s06) |\n|-------------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Four-lever compression |\n| Persisted-output | None | Large outputs -> disk + preview |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (watch micro-compact replace old results)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Explain why a long agent session degrades and eventually fails without compression\n- Intercept oversized tool outputs before they enter the context window\n- Silently replace stale tool results with lightweight placeholders each turn\n- Trigger a full conversation summarization -- automatically on a threshold or manually via a tool call\n- Preserve full transcripts on disk so nothing is permanently lost\n\n## Stage 1 Complete\n\nYou now have a complete single-agent system. Starting from a bare API call in s01, you have built up tool use, structured planning, sub-agent delegation, dynamic skill loading, and context compression. Your agent can read, write, execute, plan, delegate, and work indefinitely without running out of memory. That is a real coding agent.\n\nBefore moving on, consider going back to s01 and rebuilding the whole stack from scratch without looking at the code. If you can write all six layers from memory, you truly own the ideas -- not just the implementation.\n\nStage 2 begins with s07 and hardens this foundation. You will add permission controls, hook systems, persistent memory, error recovery, and more. The single agent you built here becomes the kernel that everything else wraps around.\n\n## Key Takeaway\n\n> Compaction is not deleting history -- it is relocating detail so the agent can keep working.\n"
},
{
"version": "s07",
+ "slug": "s07-permission-system",
"locale": "en",
- "title": "s07: Task System",
- "content": "# s07: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"Break big goals into small tasks, order them, persist to disk\"* -- a file-based task graph with dependencies, laying the foundation for multi-agent collaboration.\n\n## Problem\n\ns03's TodoManager is a flat checklist in memory: no ordering, no dependencies, no status beyond done-or-not. Real goals have structure -- task B depends on task A, tasks C and D can run in parallel, task E waits for both C and D.\n\nWithout explicit relationships, the agent can't tell what's ready, what's blocked, or what can run concurrently. And because the list lives only in memory, context compression (s06) wipes it clean.\n\n## Solution\n\nPromote the checklist into a **task graph** persisted to disk. Each task is a JSON file with status, dependencies (`blockedBy`), and dependents (`blocks`). The graph answers three questions at any moment:\n\n- **What's ready?** -- tasks with `pending` status and empty `blockedBy`.\n- **What's blocked?** -- tasks waiting on unfinished dependencies.\n- **What's done?** -- `completed` tasks, whose completion automatically unblocks dependents.\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nTask graph (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\nOrdering: task 1 must finish before 2 and 3\nParallelism: tasks 2 and 3 can run at the same time\nDependencies: task 4 waits for both 2 and 3\nStatus: pending -> in_progress -> completed\n```\n\nThis task graph becomes the coordination backbone for everything after s07: background execution (s08), multi-agent teams (s09+), and worktree isolation (s12) all read from and write to this same structure.\n\n## How It Works\n\n1. **TaskManager**: one JSON file per task, CRUD with dependency graph.\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **Dependency resolution**: completing a task clears its ID from every other task's `blockedBy` list, automatically unblocking dependents.\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **Status + dependency wiring**: `update` handles transitions and dependency edges.\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. Four task tools go into the dispatch map.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\nFrom s07 onward, the task graph is the default for multi-step work. s03's Todo remains for quick single-session checklists.\n\n## What Changed From s06\n\n| Component | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| Planning model | Flat checklist (in-memory) | Task graph with dependencies (on disk) |\n| Relationships | None | `blockedBy` + `blocks` edges |\n| Status tracking | Done or not | `pending` -> `in_progress` -> `completed` |\n| Persistence | Lost on compression | Survives compression and restarts |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n"
+ "title": "s07: Permission System",
+ "kind": "chapter",
+ "filename": "s07-permission-system.md",
+ "content": "# s07: Permission System\n\n`s01 > s02 > s03 > s04 > s05 > s06 > [ s07 ] > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n\n- A four-stage permission pipeline that every tool call must pass through before execution\n- Three permission modes that control how aggressively the agent auto-approves actions\n- How deny and allow rules use pattern matching to create a first-match-wins policy\n- Interactive approval with an \"always\" option that writes permanent allow rules at runtime\n\nYour agent from s06 is capable and long-lived. It reads files, writes code, runs shell commands, delegates subtasks, and compresses its own context to keep going. But there is no safety catch. Every tool call the model proposes goes straight to execution. Ask it to delete a directory and it will -- no questions asked. Before you give this agent access to anything that matters, you need a gate between \"the model wants to do X\" and \"the system actually does X.\"\n\n## The Problem\n\nImagine your agent is helping refactor a codebase. It reads a few files, proposes some edits, and then decides to run `rm -rf /tmp/old_build` to clean up. Except the model hallucinated the path -- the real directory is your home folder. Or it decides to `sudo` something because the model has seen that pattern in training data. Without a permission layer, intent becomes execution instantly. There is no moment where the system can say \"wait, that looks dangerous\" or where you can say \"no, do not do that.\" The agent needs a checkpoint -- a pipeline (a sequence of stages that every request passes through) between what the model asks for and what actually happens.\n\n## The Solution\n\nEvery tool call now passes through a four-stage permission pipeline before execution. The stages run in order, and the first one that produces a definitive answer wins.\n\n```\ntool_call from LLM\n |\n v\n[1. Deny rules] -- blocklist: always block these\n |\n v\n[2. Mode check] -- plan mode? auto mode? default?\n |\n v\n[3. Allow rules] -- allowlist: always allow these\n |\n v\n[4. Ask user] -- interactive y/n/always prompt\n |\n v\nexecute (or reject)\n```\n\n## Read Together\n\n- If you start blurring \"the model proposed an action\" with \"the system actually executed an action,\" you might find it helpful to revisit [`s00a-query-control-plane.md`](./s00a-query-control-plane.md).\n- If you are not yet clear on why tool requests should not drop straight into handlers, keeping [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md) open beside this chapter may help.\n- If `PermissionRule`, `PermissionDecision`, and `tool_result` start to collapse into one vague idea, [`data-structures.md`](./data-structures.md) can reset them.\n\n## How It Works\n\n**Step 1.** Define three permission modes. Each mode changes how the pipeline treats tool calls that do not match any explicit rule. \"Default\" mode is the safest -- it asks you about everything. \"Plan\" mode blocks all writes outright, useful when you want the agent to explore without touching anything. \"Auto\" mode lets reads through silently and only asks about writes, good for fast exploration.\n\n| Mode | Behavior | Use Case |\n|------|----------|----------|\n| `default` | Ask user for every unmatched tool call | Normal interactive use |\n| `plan` | Block all writes, allow reads | Planning/review mode |\n| `auto` | Auto-allow reads, ask for writes | Fast exploration mode |\n\n**Step 2.** Set up deny and allow rules with pattern matching. Rules are checked in order -- first match wins. Deny rules catch dangerous patterns that should never execute, regardless of mode. Allow rules let known-safe operations pass without asking.\n\n```python\nrules = [\n # Always deny dangerous patterns\n {\"tool\": \"bash\", \"content\": \"rm -rf /\", \"behavior\": \"deny\"},\n {\"tool\": \"bash\", \"content\": \"sudo *\", \"behavior\": \"deny\"},\n # Allow reading anything\n {\"tool\": \"read_file\", \"path\": \"*\", \"behavior\": \"allow\"},\n]\n```\n\nWhen the user answers \"always\" at the interactive prompt, a permanent allow rule is added at runtime.\n\n**Step 3.** Implement the four-stage check. This is the core of the permission system. Notice that deny rules run first and cannot be bypassed -- this is intentional. No matter what mode you are in or what allow rules exist, a deny rule always wins.\n\n```python\ndef check(self, tool_name, tool_input):\n # Step 1: Deny rules (bypass-immune, always checked first)\n for rule in self.rules:\n if rule[\"behavior\"] == \"deny\" and self._matches(rule, ...):\n return {\"behavior\": \"deny\", \"reason\": \"...\"}\n\n # Step 2: Mode-based decisions\n if self.mode == \"plan\" and tool_name in WRITE_TOOLS:\n return {\"behavior\": \"deny\", \"reason\": \"Plan mode: writes blocked\"}\n if self.mode == \"auto\" and tool_name in READ_ONLY_TOOLS:\n return {\"behavior\": \"allow\", \"reason\": \"Auto: read-only approved\"}\n\n # Step 3: Allow rules\n for rule in self.rules:\n if rule[\"behavior\"] == \"allow\" and self._matches(rule, ...):\n return {\"behavior\": \"allow\", \"reason\": \"...\"}\n\n # Step 4: Fall through to ask user\n return {\"behavior\": \"ask\", \"reason\": \"...\"}\n```\n\n**Step 4.** Integrate the permission check into the agent loop. Every tool call now goes through the pipeline before execution. The result is one of three outcomes: denied (with a reason), allowed (silently), or asked (interactively).\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n decision = perms.check(block.name, block.input)\n\n if decision[\"behavior\"] == \"deny\":\n output = f\"Permission denied: {decision['reason']}\"\n elif decision[\"behavior\"] == \"ask\":\n if perms.ask_user(block.name, block.input):\n output = handler(**block.input)\n else:\n output = \"Permission denied by user\"\n else: # allow\n output = handler(**block.input)\n\n results.append({\"type\": \"tool_result\", ...})\n```\n\n**Step 5.** Add denial tracking as a simple circuit breaker. The `PermissionManager` tracks consecutive denials. After 3 in a row, it suggests switching to plan mode -- this prevents the agent from repeatedly hitting the same wall and wasting turns.\n\n## What Changed From s06\n\n| Component | Before (s06) | After (s07) |\n|-----------|-------------|-------------|\n| Safety | None | 4-stage permission pipeline |\n| Modes | None | 3 modes: default, plan, auto |\n| Rules | None | Deny/allow rules with pattern matching |\n| User control | None | Interactive approval with \"always\" option |\n| Denial tracking | None | Circuit breaker after 3 consecutive denials |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s07_permission_system.py\n```\n\n1. Start in `default` mode -- every write tool asks for approval\n2. Try `plan` mode -- all writes are blocked, reads pass through\n3. Try `auto` mode -- reads auto-approved, writes still ask\n4. Answer \"always\" to permanently allow a tool\n5. Type `/mode plan` to switch modes at runtime\n6. Type `/rules` to inspect current rule set\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Explain why model intent must pass through a decision pipeline before it becomes execution\n- Build a four-stage permission check: deny, mode, allow, ask\n- Configure three permission modes that give you different safety/speed tradeoffs\n- Add rules dynamically at runtime when a user answers \"always\"\n- Implement a simple circuit breaker that catches repeated denial loops\n\n## What's Next\n\nYour permission system controls what the agent is allowed to do, but it lives entirely inside the agent's own code. What if you want to extend behavior -- add logging, auditing, or custom validation -- without modifying the agent loop at all? That is what s08 introduces: a hook system that lets external shell scripts observe and influence every tool call.\n\n## Key Takeaway\n\n> Safety is a pipeline, not a boolean -- deny first, then consider mode, then check allow rules, then ask the user.\n"
},
{
"version": "s08",
+ "slug": "s08-hook-system",
"locale": "en",
- "title": "s08: Background Tasks",
- "content": "# s08: Background Tasks\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"Run slow operations in the background; the agent keeps thinking\"* -- daemon threads run commands, inject notifications on completion.\n\n## Problem\n\nSome commands take minutes: `npm install`, `pytest`, `docker build`. With a blocking loop, the model sits idle waiting. If the user asks \"install dependencies and while that runs, create the config file,\" the agent does them sequentially, not in parallel.\n\n## Solution\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## How It Works\n\n1. BackgroundManager tracks tasks with a thread-safe notification queue.\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` starts a daemon thread and returns immediately.\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. When the subprocess finishes, its result goes into the notification queue.\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. The agent loop drains notifications before each LLM call.\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\" \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\nThe loop stays single-threaded. Only subprocess I/O is parallelized.\n\n## What Changed From s07\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n"
+ "title": "s08: Hook System",
+ "kind": "chapter",
+ "filename": "s08-hook-system.md",
+ "content": "# s08: Hook System\n\n`s01 > s02 > s03 > s04 > s05 > s06 > s07 > [ s08 ] > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n\n- Three lifecycle events that let external code observe and influence the agent loop\n- How shell-based hooks run as subprocesses with full context about the current tool call\n- The exit code protocol: 0 means continue, 1 means block, 2 means inject a message\n- How to configure hooks in an external JSON file so you never touch the main loop code\n\nYour agent from s07 has a permission system that controls what it is allowed to do. But permissions are a yes/no gate -- they do not let you add new behavior. Suppose you want every bash command to be logged to an audit file, or you want a linter to run automatically after every file write, or you want a custom security scanner to inspect tool inputs before they execute. You could add if/else branches inside the main loop for each of these, but that turns your clean loop into a tangle of special cases. What you really want is a way to extend the agent's behavior from the outside, without modifying the loop itself.\n\n## The Problem\n\nYou are running your agent in a team environment. Different teams want different behaviors: the security team wants to scan every bash command, the QA team wants to auto-run tests after file edits, and the ops team wants an audit trail of every tool call. If each of these requires code changes to the agent loop, you end up with a mess of conditionals that nobody can maintain. Worse, every new requirement means redeploying the agent. You need a way for teams to plug in their own logic at well-defined moments -- without touching the core code.\n\n## The Solution\n\nThe agent loop exposes three fixed extension points (lifecycle events). At each point, it runs external shell commands called hooks. Each hook communicates its intent through its exit code: continue silently, block the operation, or inject a message into the conversation.\n\n```\ntool_call from LLM\n |\n v\n[PreToolUse hooks]\n | exit 0 -> continue\n | exit 1 -> block tool, return stderr as error\n | exit 2 -> inject stderr into conversation, continue\n |\n v\n[execute tool]\n |\n v\n[PostToolUse hooks]\n | exit 0 -> continue\n | exit 2 -> append stderr to result\n |\n v\nreturn result\n```\n\n## Read Together\n\n- If you still picture hooks as \"more if/else branches inside the main loop,\" you might find it helpful to revisit [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md) first.\n- If the main loop, the tool handler, and hook side effects start to blur together, [`entity-map.md`](./entity-map.md) can help you separate who advances core state and who only watches from the side.\n- If you plan to continue into prompt assembly, recovery, or teams, keeping [`s00e-reference-module-map.md`](./s00e-reference-module-map.md) nearby is useful because this \"core loop plus sidecar extension\" pattern returns repeatedly.\n\n## How It Works\n\n**Step 1.** Define three lifecycle events. `SessionStart` fires once when the agent starts up -- useful for initialization, logging, or environment checks. `PreToolUse` fires before every tool call and is the only event that can block execution. `PostToolUse` fires after every tool call and can annotate the result but cannot undo it.\n\n| Event | When | Can Block? |\n|-------|------|-----------|\n| `SessionStart` | Once at session start | No |\n| `PreToolUse` | Before each tool call | Yes (exit 1) |\n| `PostToolUse` | After each tool call | No |\n\n**Step 2.** Configure hooks in an external `.hooks.json` file at the workspace root. Each hook specifies a shell command to run. An optional `matcher` field filters by tool name -- without a matcher, the hook fires for every tool.\n\n```json\n{\n \"hooks\": {\n \"PreToolUse\": [\n {\"matcher\": \"bash\", \"command\": \"echo 'Checking bash command...'\"},\n {\"matcher\": \"write_file\", \"command\": \"/path/to/lint-check.sh\"}\n ],\n \"PostToolUse\": [\n {\"command\": \"echo 'Tool finished'\"}\n ],\n \"SessionStart\": [\n {\"command\": \"echo 'Session started at $(date)'\"}\n ]\n }\n}\n```\n\n**Step 3.** Implement the exit code protocol. This is the heart of the hook system -- three exit codes, three meanings. The protocol is deliberately simple so that any language or script can participate. Write your hook in bash, Python, Ruby, whatever -- as long as it exits with the right code.\n\n| Exit Code | Meaning | PreToolUse | PostToolUse |\n|-----------|---------|-----------|------------|\n| 0 | Success | Continue to execute tool | Continue normally |\n| 1 | Block | Tool NOT executed, stderr returned as error | Warning logged |\n| 2 | Inject | stderr injected as message, tool still executes | stderr appended to result |\n\n**Step 4.** Pass context to hooks via environment variables. Hooks need to know what is happening -- which event triggered them, which tool is being called, and what the input looks like. For `PostToolUse` hooks, the tool output is also available.\n\n```\nHOOK_EVENT=PreToolUse\nHOOK_TOOL_NAME=bash\nHOOK_TOOL_INPUT={\"command\": \"npm test\"}\nHOOK_TOOL_OUTPUT=... (PostToolUse only)\n```\n\n**Step 5.** Integrate hooks into the agent loop. The integration is clean: run pre-hooks before execution, check if any blocked, execute the tool, run post-hooks, and collect any injected messages. The loop still owns control flow -- hooks only observe, block, or annotate at named moments.\n\n```python\n# Before tool execution\npre_result = hooks.run_hooks(\"PreToolUse\", ctx)\nif pre_result[\"blocked\"]:\n output = f\"Blocked by hook: {pre_result['block_reason']}\"\n continue\n\n# Execute tool\noutput = handler(**tool_input)\n\n# After tool execution\npost_result = hooks.run_hooks(\"PostToolUse\", ctx)\nfor msg in post_result[\"messages\"]:\n output += f\"\\n[Hook note]: {msg}\"\n```\n\n## What Changed From s07\n\n| Component | Before (s07) | After (s08) |\n|-----------|-------------|-------------|\n| Extensibility | None | Shell-based hook system |\n| Events | None | PreToolUse, PostToolUse, SessionStart |\n| Control flow | Permission pipeline only | Permission + hooks |\n| Configuration | In-code rules | External `.hooks.json` file |\n\n## Try It\n\n```sh\ncd learn-claude-code\n# Create a hook config\ncat > .hooks.json << 'EOF'\n{\n \"hooks\": {\n \"PreToolUse\": [\n {\"matcher\": \"bash\", \"command\": \"echo 'Auditing bash command' >&2; exit 0\"}\n ],\n \"SessionStart\": [\n {\"command\": \"echo 'Agent session started'\"}\n ]\n }\n}\nEOF\npython agents/s08_hook_system.py\n```\n\n1. Watch SessionStart hook fire at startup\n2. Ask the agent to run a bash command -- see PreToolUse hook fire\n3. Create a blocking hook (exit 1) and watch it prevent tool execution\n4. Create an injecting hook (exit 2) and watch it add messages to the conversation\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Explain why extension points are better than in-loop conditionals for adding new behavior\n- Define lifecycle events at the right moments in the agent loop\n- Write shell hooks that communicate intent through a three-code exit protocol\n- Configure hooks externally so different teams can customize behavior without touching the agent code\n- Maintain the boundary: the loop owns control flow, the handler owns execution, hooks only observe, block, or annotate\n\n## What's Next\n\nYour agent can now execute tools safely (s07) and be extended without code changes (s08). But it still has amnesia -- every new session starts from zero. The user's preferences, corrections, and project context are forgotten the moment the session ends. In s09, you will build a memory system that lets the agent carry durable facts across sessions.\n\n## Key Takeaway\n\n> The main loop can expose fixed extension points without giving up ownership of control flow -- hooks observe, block, or annotate, but the loop still decides what happens next.\n"
},
{
"version": "s09",
+ "slug": "s09-memory-system",
"locale": "en",
- "title": "s09: Agent Teams",
- "content": "# s09: Agent Teams\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"When the task is too big for one, delegate to teammates\"* -- persistent teammates + async mailboxes.\n\n## Problem\n\nSubagents (s04) are disposable: spawn, work, return summary, die. No identity, no memory between invocations. Background tasks (s08) run shell commands but can't make LLM-guided decisions.\n\nReal teamwork needs: (1) persistent agents that outlive a single prompt, (2) identity and lifecycle management, (3) a communication channel between agents.\n\n## Solution\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## How It Works\n\n1. TeammateManager maintains config.json with the team roster.\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` creates a teammate and starts its agent loop in a thread.\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: append-only JSONL inboxes. `send()` appends a JSON line; `read_inbox()` reads all and drains.\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. Each teammate checks its inbox before every LLM call, injecting received messages into context.\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## What Changed From s08\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | message + broadcast |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. Type `/team` to see the team roster with statuses\n5. Type `/inbox` to manually check the lead's inbox\n"
+ "title": "s09: Memory System",
+ "kind": "chapter",
+ "filename": "s09-memory-system.md",
+ "content": "# s09: Memory System\n\n`s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > [ s09 ] > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n\n- Four memory categories that cover what is worth remembering: user preferences, feedback, project facts, and references\n- How YAML frontmatter files give each memory record a name, type, and description\n- What should NOT go into memory -- and why getting this boundary wrong is the most common mistake\n- The difference between memory, tasks, plans, and CLAUDE.md\n\nYour agent from s08 is powerful and extensible. It can execute tools safely, be extended through hooks, and work for long sessions thanks to context compression. But it has amnesia. Every time you start a new session, the agent meets you for the first time. It does not remember that you prefer pnpm over npm, that you told it three times to stop modifying test snapshots, or that the legacy directory cannot be deleted because deployment depends on it. You end up repeating yourself every session. The fix is a small, durable memory store -- not a dump of everything the agent has seen, but a curated set of facts that should still matter next time.\n\n## The Problem\n\nWithout memory, a new session starts from zero. The agent keeps forgetting things like long-term user preferences, corrections you have repeated multiple times, project constraints that are not obvious from the code itself, and external references the project depends on. The result is an agent that always feels like it is meeting you for the first time. You waste time re-establishing context that should have been saved once and loaded automatically.\n\n## The Solution\n\nA small file-based memory store saves durable facts as individual markdown files with YAML frontmatter (a metadata block at the top of each file, delimited by `---` lines). At the start of each session, relevant memories are loaded and injected into the model's context.\n\n```text\nconversation\n |\n | durable fact appears\n v\nsave_memory\n |\n v\n.memory/\n ├── MEMORY.md\n ├── prefer_pnpm.md\n ├── ask_before_codegen.md\n └── incident_dashboard.md\n |\n v\nnext session loads relevant entries\n```\n\n## Read Together\n\n- If you still think memory is just \"a longer context window,\" you might find it helpful to revisit [`s06-context-compact.md`](./s06-context-compact.md) and re-separate compaction from durable memory.\n- If `messages[]`, summary blocks, and the memory store start to blend together, keeping [`data-structures.md`](./data-structures.md) open while reading can help.\n- If you are about to continue into s10, reading [`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md) alongside this chapter is useful because memory matters most when it re-enters the next model input.\n\n## How It Works\n\n**Step 1.** Define four memory categories. These are the types of facts worth keeping across sessions. Each category has a clear purpose -- if a fact does not fit one of these, it probably should not be in memory.\n\n### 1. `user` -- Stable user preferences\n\nExamples: prefers `pnpm`, wants concise answers, dislikes large refactors without a plan.\n\n### 2. `feedback` -- Corrections the user wants enforced\n\nExamples: \"do not change test snapshots unless I ask\", \"ask before modifying generated files.\"\n\n### 3. `project` -- Durable project facts not obvious from the repo\n\nExamples: \"this old directory still cannot be deleted because deployment depends on it\", \"this service exists because of a compliance requirement, not technical preference.\"\n\n### 4. `reference` -- Pointers to external resources\n\nExamples: incident board URL, monitoring dashboard location, spec document location.\n\n```python\nMEMORY_TYPES = (\"user\", \"feedback\", \"project\", \"reference\")\n```\n\n**Step 2.** Save one record per file using frontmatter. Each memory is a markdown file with YAML frontmatter that tells the system what the memory is called, what kind it is, and what it is roughly about.\n\n```md\n---\nname: prefer_pnpm\ndescription: User prefers pnpm over npm\ntype: user\n---\nThe user explicitly prefers pnpm for package management commands.\n```\n\n```python\ndef save_memory(name, description, mem_type, content):\n path = memory_dir / f\"{slugify(name)}.md\"\n path.write_text(render_frontmatter(name, description, mem_type) + content)\n rebuild_index()\n```\n\n**Step 3.** Build a small index so the system knows what memories exist without reading every file.\n\n```md\n# Memory Index\n\n- prefer_pnpm [user]\n- ask_before_codegen [feedback]\n- incident_dashboard [reference]\n```\n\nThe index is not the memory itself -- it is a quick map of what exists.\n\n**Step 4.** Load relevant memory at session start and turn it into a prompt section. Memory becomes useful only when it is fed back into the model input. This is why s09 naturally connects into s10.\n\n```python\nmemories = memory_store.load_all()\n```\n\n**Step 5.** Know what should NOT go into memory. This boundary is the most important part of the chapter, and the place where most beginners go wrong.\n\n| Do not store | Why |\n|---|---|\n| file tree layout | can be re-read from the repo |\n| function names and signatures | code is the source of truth |\n| current task status | belongs to task / plan, not memory |\n| temporary branch names or PR numbers | gets stale quickly |\n| secrets or credentials | security risk |\n\nThe right rule is: only keep information that still matters across sessions and cannot be cheaply re-derived from the current workspace.\n\n**Step 6.** Understand the boundaries against neighbor concepts. These four things sound similar but serve different purposes.\n\n| Concept | Purpose | Lifetime |\n|---------|---------|----------|\n| Memory | Facts that should survive across sessions | Persistent |\n| Task | What the system is trying to finish right now | One task |\n| Plan | How this turn or session intends to proceed | One session |\n| CLAUDE.md | Stable instruction documents and project-level standing rules | Persistent |\n\nShort rule of thumb: only useful for this task -- use `task` or `plan`. Useful next session too -- use `memory`. Long-lived instruction text -- use `CLAUDE.md`.\n\n## Common Mistakes\n\n**Mistake 1: Storing things the repo can tell you.** If the code can answer it, memory should not duplicate it. You will just end up with stale copies that conflict with reality.\n\n**Mistake 2: Storing live task progress.** \"Currently fixing auth\" is not memory. That belongs to plan or task state. When the task is done, the memory is meaningless.\n\n**Mistake 3: Treating memory as absolute truth.** Memory can be stale. The safer rule is: memory gives direction, current observation gives truth.\n\n## What Changed From s08\n\n| Component | Before (s08) | After (s09) |\n|-----------|-------------|-------------|\n| Cross-session state | None | File-based memory store |\n| Memory types | None | user, feedback, project, reference |\n| Storage format | None | YAML frontmatter markdown files |\n| Session start | Cold start | Loads relevant memories |\n| Durability | Everything forgotten | Key facts persist |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s09_memory_system.py\n```\n\nTry asking it to remember:\n\n- a user preference\n- a correction you want enforced later\n- a project fact that is not obvious from the repository\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Explain why memory is a curated store of durable facts, not a dump of everything the agent has seen\n- Categorize facts into four types: user preferences, feedback, project knowledge, and references\n- Store and retrieve memories using frontmatter-based markdown files\n- Draw a clear line between what belongs in memory and what belongs in task state, plans, or CLAUDE.md\n- Avoid the three most common mistakes: duplicating the repo, storing transient state, and treating memories as ground truth\n\n## What's Next\n\nYour agent now remembers things across sessions, but those memories just sit in a file until session start. In s10, you will build the system prompt assembly pipeline -- the mechanism that takes memories, skills, permissions, and other context and weaves them into the prompt that the model actually sees on every turn.\n\n## Key Takeaway\n\n> Memory is not a dump of everything the agent has seen -- it is a small store of durable facts that should still matter next session.\n"
},
{
"version": "s10",
+ "slug": "s10-system-prompt",
"locale": "en",
- "title": "s10: Team Protocols",
- "content": "# s10: Team Protocols\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"Teammates need shared communication rules\"* -- one request-response pattern drives all negotiation.\n\n## Problem\n\nIn s09, teammates work and communicate but lack structured coordination:\n\n**Shutdown**: Killing a thread leaves files half-written and config.json stale. You need a handshake: the lead requests, the teammate approves (finish and exit) or rejects (keep working).\n\n**Plan approval**: When the lead says \"refactor the auth module,\" the teammate starts immediately. For high-risk changes, the lead should review the plan first.\n\nBoth share the same structure: one side sends a request with a unique ID, the other responds referencing that ID.\n\n## Solution\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## How It Works\n\n1. The lead initiates shutdown by generating a request_id and sending through the inbox.\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. The teammate receives the request and responds with approve/reject.\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. Plan approval follows the identical pattern. The teammate submits a plan (generating a request_id), the lead reviews (referencing the same request_id).\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\nOne FSM, two applications. The same `pending -> approved | rejected` state machine handles any request-response protocol.\n\n## What Changed From s09\n\n| Component | Before (s09) | After (s10) |\n|----------------|------------------|------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\n| Shutdown | Natural exit only| Request-response handshake |\n| Plan gating | None | Submit/review with approval |\n| Correlation | None | request_id per request |\n| FSM | None | pending -> approved/rejected |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. Type `/team` to monitor statuses\n"
+ "title": "s10: System Prompt",
+ "kind": "chapter",
+ "filename": "s10-system-prompt.md",
+ "content": "# s10: System Prompt\n\n`s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > [ s10 ] > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n\n- How to assemble the system prompt from independent sections instead of one hardcoded string\n- The boundary between stable content (role, rules) and dynamic content (date, cwd, per-turn reminders)\n- How CLAUDE.md files layer instructions without overwriting each other\n- Why memory must be re-injected through the prompt pipeline to actually guide the agent\n\nWhen your agent had one tool and one job, a single hardcoded prompt string worked fine. But look at everything your harness has accumulated by now: a role description, tool definitions, loaded skills, saved memory, CLAUDE.md instruction files, and per-turn runtime context. If you keep cramming all of that into one big string, nobody -- including you -- can tell where each piece came from, why it is there, or how to change it safely. The fix is to stop treating the prompt as a blob and start treating it as an assembly pipeline.\n\n## The Problem\n\nImagine you want to add a new tool to your agent. You open the system prompt, scroll past the role paragraph, past the safety rules, past the three skill descriptions, past the memory block, and paste a tool description somewhere in the middle. Next week someone else adds a CLAUDE.md loader and appends its output to the same string. A month later the prompt is 6,000 characters long, half of it is stale, and nobody remembers which lines are supposed to change per turn and which should stay fixed across the entire session.\n\nThis is not a hypothetical scenario -- it is the natural trajectory of every agent that keeps its prompt in a single variable.\n\n## The Solution\n\nTurn prompt construction into a pipeline. Each section has one source and one responsibility. A builder object assembles them in a fixed order, with a clear boundary between parts that stay stable and parts that change every turn.\n\n```text\n1. core identity and rules\n2. tool catalog\n3. skills\n4. memory\n5. CLAUDE.md instruction chain\n6. dynamic runtime context\n```\n\nThen assemble:\n\n```text\ncore\n+ tools\n+ skills\n+ memory\n+ claude_md\n+ dynamic_context\n= final model input\n```\n\n## How It Works\n\n**Step 1. Define the builder.** Each method owns exactly one source of content.\n\n```python\nclass SystemPromptBuilder:\n def build(self) -> str:\n parts = []\n parts.append(self._build_core())\n parts.append(self._build_tools())\n parts.append(self._build_skills())\n parts.append(self._build_memory())\n parts.append(self._build_claude_md())\n parts.append(self._build_dynamic())\n return \"\\n\\n\".join(p for p in parts if p)\n```\n\nThat is the central idea of the chapter. Each `_build_*` method pulls from one source only: `_build_tools()` reads the tool list, `_build_memory()` reads the memory store, and so on. If you want to know where a line in the prompt came from, you check the one method responsible for it.\n\n**Step 2. Separate stable content from dynamic content.** This is the most important boundary in the entire pipeline.\n\nStable content changes rarely or never during a session:\n\n- role description\n- tool contract (the list of tools and their schemas)\n- long-lived safety rules\n- project instruction chain (CLAUDE.md files)\n\nDynamic content changes every turn or every few turns:\n\n- current date\n- current working directory\n- current mode (plan mode, code mode, etc.)\n- per-turn warnings or reminders\n\nMixing these together means the model re-reads thousands of tokens of stable text that have not changed, while the few tokens that did change are buried somewhere in the middle. A real system separates them with a boundary marker so the stable prefix can be cached across turns to save prompt tokens.\n\n**Step 3. Layer CLAUDE.md instructions.** `CLAUDE.md` is not the same as memory and not the same as a skill. It is a layered instruction source -- meaning multiple files contribute, and later layers add to earlier ones rather than replacing them:\n\n1. user-level instruction file (`~/.claude/CLAUDE.md`)\n2. project-root instruction file (`/CLAUDE.md`)\n3. deeper subdirectory instruction files\n\nThe important point is not the filename itself. The important point is that instruction sources can be layered instead of overwritten.\n\n**Step 4. Re-inject memory.** Saving memory (in s09) is only half the mechanism. If memory never re-enters the model input, it is not actually guiding the agent. So memory naturally belongs in the prompt pipeline:\n\n- save durable facts in `s09`\n- re-inject them through the prompt builder in `s10`\n\n**Step 5. Attach per-turn reminders separately.** Some information is even more short-lived than \"dynamic context\" -- it only matters for this one turn and should not pollute the stable system prompt. A `system-reminder` user message keeps these transient signals outside the builder entirely:\n\n- this-turn-only instructions\n- temporary notices\n- transient recovery guidance\n\n## What Changed from s09\n\n| Aspect | s09: Memory System | s10: System Prompt |\n|--------|--------------------|--------------------|\n| Core concern | Persist durable facts across sessions | Assemble all sources into model input |\n| Memory's role | Write and store | Read and inject |\n| Prompt structure | Assumed but not managed | Explicit pipeline with sections |\n| Instruction files | Not addressed | CLAUDE.md layering introduced |\n| Dynamic context | Not addressed | Separated from stable content |\n\n## Read Together\n\n- If you still treat the prompt as one mysterious blob of text, revisit [`s00a-query-control-plane.md`](./s00a-query-control-plane.md) to see what reaches the model and through which control layers.\n- If you want to stabilize the order of assembly, keep [`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md) beside this chapter -- it is the key bridge note for `s10`.\n- If system rules, tool docs, memory, and runtime state start to collapse into one big input lump, reset with [`data-structures.md`](./data-structures.md).\n\n## Common Beginner Mistakes\n\n**Mistake 1: teaching the prompt as one fixed string.** That hides how the system really grows. A fixed string is fine for a demo; it stops being fine the moment you add a second capability.\n\n**Mistake 2: putting every changing detail into the same prompt block.** That mixes durable rules with per-turn noise. When you update one, you risk breaking the other.\n\n**Mistake 3: treating skills, memory, and CLAUDE.md as the same thing.** They may all become prompt sections, but their source and purpose are different:\n\n- `skills`: optional capability packages loaded on demand\n- `memory`: durable cross-session facts about the user or project\n- `CLAUDE.md`: standing instruction documents that layer without overwriting\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s10_system_prompt.py\n```\n\nLook for these three things:\n\n1. where each section comes from\n2. which parts are stable\n3. which parts are generated dynamically each turn\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Build a system prompt from independent, testable sections instead of one opaque string\n- Draw a clear line between stable content and dynamic content\n- Layer instruction files so that project-level and directory-level rules coexist without overwriting\n- Re-inject memory into the prompt pipeline so saved facts actually influence the model\n- Attach per-turn reminders separately from the main system prompt\n\n## What's Next\n\nThe prompt assembly pipeline means your agent now enters each turn with the right instructions, the right tools, and the right context. But real work produces real failures -- output gets cut off, the prompt grows too large, the API times out. In [s11: Error Recovery](./s11-error-recovery.md), you will teach the harness to classify those failures and choose a recovery path instead of crashing.\n\n## Key Takeaway\n\n> The system prompt is an assembly pipeline with clear sections and clear boundaries, not one big mysterious string.\n"
+ },
+ {
+ "version": null,
+ "slug": "s10a-message-prompt-pipeline",
+ "locale": "en",
+ "title": "s10a: Message & Prompt Pipeline",
+ "kind": "bridge",
+ "filename": "s10a-message-prompt-pipeline.md",
+ "content": "# s10a: Message & Prompt Pipeline\n\n> **Deep Dive** -- Best read alongside s10. It shows why the system prompt is only one piece of the model's full input.\n\n### When to Read This\n\nWhen you're working on prompt assembly and want to see the complete input pipeline.\n\n---\n\n> This bridge document extends `s10`.\n>\n> It exists to make one crucial idea explicit:\n>\n> **the system prompt matters, but it is not the whole model input.**\n\n## Why This Document Exists\n\n`s10` already upgrades the system prompt from one giant string into a maintainable assembly process.\n\nThat is important.\n\nBut a higher-completion system goes one step further and treats the whole model input as a pipeline made from multiple sources:\n\n- system prompt blocks\n- normalized messages\n- memory attachments\n- reminder injections\n- dynamic runtime context\n\nSo the true structure is:\n\n**a prompt pipeline, not only a prompt builder.**\n\n## Terms First\n\n### Prompt block\n\nA structured piece inside the system prompt, such as:\n\n- core identity\n- tool instructions\n- memory section\n- CLAUDE.md section\n\n### Normalized message\n\nA message that has already been converted into a stable shape suitable for the model API.\n\nThis is necessary because the raw system may contain:\n\n- user messages\n- assistant replies\n- tool results\n- reminder injections\n- attachment-like content\n\nNormalization ensures all of these fit the same structural contract before they reach the API.\n\n### System reminder\n\nA small temporary instruction injected for the current turn or current mode.\n\nUnlike a long-lived prompt block, a reminder is usually short-lived and situational -- for example, telling the model it is currently in \"plan mode\" or that a certain tool is temporarily unavailable.\n\n## The Smallest Useful Mental Model\n\nThink of the full input as a pipeline:\n\n```text\nmultiple sources\n |\n +-- system prompt blocks\n +-- messages\n +-- attachments\n +-- reminders\n |\n v\nnormalize\n |\n v\nfinal API payload\n```\n\nThe key teaching point is:\n\n**separate the sources first, then normalize them into one stable input.**\n\n## Why System Prompt Is Not Everything\n\nThe system prompt is the right place for:\n\n- identity\n- stable rules\n- long-lived constraints\n- tool capability descriptions\n\nBut it is usually the wrong place for:\n\n- the latest `tool_result`\n- one-turn hook injections\n- temporary reminders\n- dynamic memory attachments\n\nThose belong in the message stream or in adjacent input surfaces.\n\n## Core Structures\n\n### `SystemPromptBlock`\n\n```python\nblock = {\n \"text\": \"...\",\n \"cache_scope\": None,\n}\n```\n\n### `PromptParts`\n\n```python\nparts = {\n \"core\": \"...\",\n \"tools\": \"...\",\n \"skills\": \"...\",\n \"memory\": \"...\",\n \"claude_md\": \"...\",\n \"dynamic\": \"...\",\n}\n```\n\n### `NormalizedMessage`\n\n```python\nmessage = {\n \"role\": \"user\" | \"assistant\",\n \"content\": [...],\n}\n```\n\nTreat `content` as a list of blocks, not just one string.\n\n### `ReminderMessage`\n\n```python\nreminder = {\n \"role\": \"system\",\n \"content\": \"Current mode: plan\",\n}\n```\n\nEven if your teaching implementation does not literally use `role=\"system\"` here, you should still keep the mental split:\n\n- long-lived prompt block\n- short-lived reminder\n\n## Minimal Implementation Path\n\n### 1. Keep a `SystemPromptBuilder`\n\nDo not throw away the prompt-builder step.\n\n### 2. Make messages a separate pipeline\n\n```python\ndef build_messages(raw_messages, attachments, reminders):\n messages = normalize_messages(raw_messages)\n messages = attach_memory(messages, attachments)\n messages = append_reminders(messages, reminders)\n return messages\n```\n\n### 3. Assemble the final payload only at the end\n\n```python\npayload = {\n \"system\": build_system_prompt(),\n \"messages\": build_messages(...),\n \"tools\": build_tools(...),\n}\n```\n\nThis is the important mental upgrade:\n\n**system prompt, messages, and tools are parallel input surfaces, not replacements for one another.**\n\n## Key Takeaway\n\n**The model input is a pipeline of sources that are normalized late, not one mystical prompt blob. System prompt, messages, and tools are parallel surfaces that converge only at send time.**\n"
},
{
"version": "s11",
+ "slug": "s11-error-recovery",
"locale": "en",
- "title": "s11: Autonomous Agents",
- "content": "# s11: Autonomous Agents\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"Teammates scan the board and claim tasks themselves\"* -- no need for the lead to assign each one.\n\n## Problem\n\nIn s09-s10, teammates only work when explicitly told to. The lead must spawn each one with a specific prompt. 10 unclaimed tasks on the board? The lead assigns each one manually. Doesn't scale.\n\nTrue autonomy: teammates scan the task board themselves, claim unclaimed tasks, work on them, then look for more.\n\nOne subtlety: after context compression (s06), the agent might forget who it is. Identity re-injection fixes this.\n\n## Solution\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## How It Works\n\n1. The teammate loop has two phases: WORK and IDLE. When the LLM stops calling tools (or calls `idle`), the teammate enters IDLE.\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. The idle phase polls inbox and task board in a loop.\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']} \"})\n return True\n return False # timeout -> shutdown\n```\n\n3. Task board scanning: find pending, unowned, unblocked tasks.\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. Identity re-injection: when context is too short (compression happened), insert an identity block.\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work. \"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## What Changed From s10\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. Type `/tasks` to see the task board with owners\n5. Type `/team` to monitor who is working vs idle\n"
+ "title": "s11: Error Recovery",
+ "kind": "chapter",
+ "filename": "s11-error-recovery.md",
+ "content": "# s11: Error Recovery\n\n`s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > [ s11 ] > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n\n- Three categories of recoverable failure: truncation, context overflow, and transient transport errors\n- How to route each failure to the right recovery branch (continuation, compaction, or backoff)\n- Why retry budgets prevent infinite loops\n- How recovery state keeps the \"why\" visible instead of burying it in a catch block\n\nYour agent is doing real work now -- reading files, writing code, calling tools across multiple turns. And real work produces real failures. Output gets cut off mid-sentence. The prompt grows past the model's context window. The API times out or hits a rate limit. If every one of these failures ends the run immediately, your system feels brittle and your users learn not to trust it. But here is the key insight: most of these failures are not true task failure. They are signals that the next step needs a different continuation path.\n\n## The Problem\n\nYour user asks the agent to refactor a large file. The model starts writing the new version, but the output hits `max_tokens` and stops mid-function. Without recovery, the agent just halts with a half-written file. The user has to notice, re-prompt, and hope the model picks up where it left off.\n\nOr: the conversation has been running for 40 turns. The accumulated messages push the prompt past the model's context limit. The API returns an error. Without recovery, the entire session is lost.\n\nOr: a momentary network hiccup drops the connection. Without recovery, the agent crashes even though the same request would succeed one second later.\n\nEach of these is a different kind of failure, and each needs a different recovery action. A single catch-all retry cannot handle all three correctly.\n\n## The Solution\n\nClassify the failure first, choose the recovery branch second, and enforce a retry budget so the system cannot loop forever.\n\n```text\nLLM call\n |\n +-- stop_reason == \"max_tokens\"\n | -> append continuation reminder\n | -> retry\n |\n +-- prompt too long\n | -> compact context\n | -> retry\n |\n +-- timeout / rate limit / connection error\n -> back off\n -> retry\n```\n\n## How It Works\n\n**Step 1. Track recovery state.** Before you can recover, you need to know how many times you have already tried. A simple counter per category prevents infinite loops:\n\n```python\nrecovery_state = {\n \"continuation_attempts\": 0,\n \"compact_attempts\": 0,\n \"transport_attempts\": 0,\n}\n```\n\n**Step 2. Classify the failure.** Each failure maps to exactly one recovery kind. The classifier examines the stop reason and error text, then returns a structured decision:\n\n```python\ndef choose_recovery(stop_reason: str | None, error_text: str | None) -> dict:\n if stop_reason == \"max_tokens\":\n return {\"kind\": \"continue\", \"reason\": \"output truncated\"}\n\n if error_text and \"prompt\" in error_text and \"long\" in error_text:\n return {\"kind\": \"compact\", \"reason\": \"context too large\"}\n\n if error_text and any(word in error_text for word in [\n \"timeout\", \"rate\", \"unavailable\", \"connection\"\n ]):\n return {\"kind\": \"backoff\", \"reason\": \"transient transport failure\"}\n\n return {\"kind\": \"fail\", \"reason\": \"unknown or non-recoverable error\"}\n```\n\nThe separation matters: classify first, act second. That way the recovery reason stays visible in state instead of disappearing inside a catch block.\n\n**Step 3. Handle continuation (truncated output).** When the model runs out of output space, the task did not fail -- the turn just ended too early. You inject a continuation reminder and retry:\n\n```python\nCONTINUE_MESSAGE = (\n \"Output limit hit. Continue directly from where you stopped. \"\n \"Do not restart or repeat.\"\n)\n```\n\nWithout this reminder, models tend to restart from the beginning or repeat what they already wrote. The explicit instruction to \"continue directly\" keeps the output flowing forward.\n\n**Step 4. Handle compaction (context overflow).** When the prompt becomes too large, the problem is not the task itself -- the accumulated context needs to shrink before the next turn can proceed. You call the same `auto_compact` mechanism from s06 to summarize history, then retry:\n\n```python\nif decision[\"kind\"] == \"compact\":\n messages = auto_compact(messages)\n continue\n```\n\n**Step 5. Handle backoff (transient errors).** When the error is probably temporary -- a timeout, a rate limit, a brief outage -- you wait and try again. Exponential backoff (doubling the delay each attempt, plus random jitter to avoid thundering-herd problems where many clients retry at the same instant) keeps the system from hammering a struggling server:\n\n```python\ndef backoff_delay(attempt: int) -> float:\n delay = min(BACKOFF_BASE_DELAY * (2 ** attempt), BACKOFF_MAX_DELAY)\n jitter = random.uniform(0, 1)\n return delay + jitter\n```\n\n**Step 6. Wire it into the loop.** The recovery logic sits right inside the agent loop. Each branch either adjusts the messages and continues, or gives up:\n\n```python\nwhile True:\n try:\n response = client.messages.create(...)\n decision = choose_recovery(response.stop_reason, None)\n except Exception as e:\n response = None\n decision = choose_recovery(None, str(e).lower())\n\n if decision[\"kind\"] == \"continue\":\n messages.append({\"role\": \"user\", \"content\": CONTINUE_MESSAGE})\n continue\n\n if decision[\"kind\"] == \"compact\":\n messages = auto_compact(messages)\n continue\n\n if decision[\"kind\"] == \"backoff\":\n time.sleep(backoff_delay(...))\n continue\n\n if decision[\"kind\"] == \"fail\":\n break\n```\n\nThe point is not clever code. The point is: classify, choose, retry with a budget.\n\n## What Changed from s10\n\n| Aspect | s10: System Prompt | s11: Error Recovery |\n|--------|--------------------|--------------------|\n| Core concern | Assemble model input from sections | Handle failures without crashing |\n| Loop behavior | Runs until end_turn or tool_use | Adds recovery branches before giving up |\n| Compaction | Not addressed | Triggered reactively on context overflow |\n| Retry logic | Not addressed | Budgeted per failure category |\n| State tracking | Prompt sections | Recovery counters |\n\n## A Note on Real Systems\n\nReal agent systems also persist session state to disk, so that a crash does not destroy a long-running conversation. Session persistence, checkpointing, and resumption are separate concerns from error recovery -- but they complement it. Recovery handles the failures you can retry in-process; persistence handles the failures you cannot. This teaching harness focuses on the in-process recovery paths, but keep in mind that production systems need both layers.\n\n## Read Together\n\n- If you start losing track of why the current query is still continuing, go back to [`s00c-query-transition-model.md`](./s00c-query-transition-model.md).\n- If context compaction and error recovery are starting to look like the same mechanism, reread [`s06-context-compact.md`](./s06-context-compact.md) to separate \"shrink context\" from \"recover after failure.\"\n- If you are about to move into `s12`, keep [`data-structures.md`](./data-structures.md) nearby because the task system adds a new durable work layer on top of recovery state.\n\n## Common Beginner Mistakes\n\n**Mistake 1: using one retry rule for every error.** Different failures need different recovery actions. Retrying a context-overflow error without compacting first will just produce the same error again.\n\n**Mistake 2: no retry budget.** Without budgets, the system can loop forever. Each recovery category needs its own counter and its own maximum.\n\n**Mistake 3: hiding the recovery reason.** The system should know *why* it is retrying. That reason should stay visible in state -- as a structured decision object -- not disappear inside a catch block.\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s11_error_recovery.py\n```\n\nTry forcing:\n\n- a long response (to trigger max_tokens continuation)\n- a large context (to trigger compaction)\n- a temporary timeout (to trigger backoff)\n\nThen observe which recovery branch the system chooses and how the retry counter increments.\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Classify agent failures into three recoverable categories and one terminal category\n- Route each failure to the correct recovery branch: continuation, compaction, or backoff\n- Enforce retry budgets so the system never loops forever\n- Keep recovery decisions visible as structured state instead of burying them in exception handlers\n- Explain why different failure types need different recovery actions\n\n## Stage 2 Complete\n\nYou have finished Stage 2 of the harness. Look at what you have built since Stage 1:\n\n- **s07 Permission System** -- the harness asks before acting, and the user controls what gets auto-approved\n- **s08 Hook System** -- external scripts run at lifecycle points without touching the agent loop\n- **s09 Memory System** -- durable facts survive across sessions\n- **s10 System Prompt** -- the prompt is an assembly pipeline with clear sections, not one big string\n- **s11 Error Recovery** -- failures route to the right recovery path instead of crashing\n\nYour agent started Stage 2 as a working loop that could call tools and manage context. It finishes Stage 2 as a system that governs itself: it checks permissions, runs hooks, remembers what matters, assembles its own instructions, and recovers from failures without human intervention.\n\nThat is a real agent harness. If you stopped here and built a product on top of it, you would have something genuinely useful.\n\nBut there is more to build. Stage 3 introduces structured work management -- task lists, background execution, and scheduled jobs. The agent stops being purely reactive and starts organizing its own work across time. See you in [s12: Task System](./s12-task-system.md).\n\n## Key Takeaway\n\n> Most agent failures are not true task failure -- they are signals to try a different continuation path, and the harness should classify them and recover automatically.\n"
},
{
"version": "s12",
+ "slug": "s12-task-system",
+ "locale": "en",
+ "title": "s12: Task System",
+ "kind": "chapter",
+ "filename": "s12-task-system.md",
+ "content": "# s12: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > [ s12 ] > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n\n- How to promote a flat checklist into a task graph with explicit dependencies\n- How `blockedBy` and `blocks` edges express ordering and parallelism\n- How status transitions (`pending` -> `in_progress` -> `completed`) drive automatic unblocking\n- How persisting tasks to disk makes them survive compression and restarts\n\nBack in s03 you gave the agent a TodoWrite tool -- a flat checklist that tracks what is done and what is not. That works well for a single focused session. But real work has structure. Task B depends on task A. Tasks C and D can run in parallel. Task E waits for both C and D. A flat list cannot express any of that. And because the checklist lives only in memory, context compression (s06) wipes it clean. In this chapter you will replace the checklist with a proper task graph that understands dependencies, persists to disk, and becomes the coordination backbone for everything that follows.\n\n## The Problem\n\nImagine you ask your agent to refactor a codebase: parse the AST, transform the nodes, emit the new code, and run the tests. The parse step must finish before transform and emit can begin. Transform and emit can run in parallel. Tests must wait for both. With s03's flat TodoWrite, the agent has no way to express these relationships. It might attempt the transform before the parse is done, or run the tests before anything is ready. There is no ordering, no dependency tracking, and no status beyond \"done or not.\" Worse, if the context window fills up and compression kicks in, the entire plan vanishes.\n\n## The Solution\n\nPromote the checklist into a task graph persisted to disk. Each task is a JSON file with status, dependencies (`blockedBy`), and dependents (`blocks`). The graph answers three questions at any moment: what is ready, what is blocked, and what is done.\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nTask graph (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\nOrdering: task 1 must finish before 2 and 3\nParallelism: tasks 2 and 3 can run at the same time\nDependencies: task 4 waits for both 2 and 3\nStatus: pending -> in_progress -> completed\n```\n\nThe structure above is a DAG -- a directed acyclic graph, meaning tasks flow forward and never loop back. This task graph becomes the coordination backbone for the later chapters: background execution (s13), agent teams (s15+), and worktree isolation (s18) all build on the same durable task structure.\n\n## How It Works\n\n**Step 1.** Create a `TaskManager` that stores one JSON file per task, with CRUD operations and a dependency graph.\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n**Step 2.** Implement dependency resolution. When a task completes, clear its ID from every other task's `blockedBy` list, automatically unblocking dependents.\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n**Step 3.** Wire up status transitions and dependency edges in the `update` method. When a task's status changes to `completed`, the dependency-clearing logic from Step 2 fires automatically.\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n**Step 4.** Register four task tools in the dispatch map, giving the agent full control over creating, updating, listing, and inspecting tasks.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\nFrom s12 onward, the task graph becomes the default for durable multi-step work. s03's Todo remains useful for quick single-session checklists, but anything that needs ordering, parallelism, or persistence belongs here.\n\n## Read Together\n\n- If you are coming straight from s03, revisit [`data-structures.md`](./data-structures.md) to separate `TodoItem` / `PlanState` from `TaskRecord` -- they look similar but serve different purposes.\n- If object boundaries start to blur, reset with [`entity-map.md`](./entity-map.md) before you mix messages, tasks, runtime tasks, and teammates into one layer.\n- If you plan to continue into s13, keep [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) beside this chapter because durable tasks and runtime tasks are the easiest pair to confuse next.\n\n## What Changed\n\n| Component | Before (s06) | After (s12) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| Planning model | Flat checklist (in-memory) | Task graph with dependencies (on disk) |\n| Relationships | None | `blockedBy` + `blocks` edges |\n| Status tracking | Done or not | `pending` -> `in_progress` -> `completed` |\n| Persistence | Lost on compression | Survives compression and restarts |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s12_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Build a file-based task graph where each task is a self-contained JSON record\n- Express ordering and parallelism through `blockedBy` and `blocks` dependency edges\n- Implement automatic unblocking when upstream tasks complete\n- Persist planning state so it survives context compression and process restarts\n\n## What's Next\n\nTasks now have structure and live on disk. But every tool call still blocks the main loop -- if a task involves a slow subprocess like `npm install` or `pytest`, the agent sits idle waiting. In s13 you will add background execution so slow work runs in parallel while the agent keeps thinking.\n\n## Key Takeaway\n\n> A task graph with explicit dependencies turns a flat checklist into a coordination structure that knows what is ready, what is blocked, and what can run in parallel.\n"
+ },
+ {
+ "version": "s13",
+ "slug": "s13-background-tasks",
+ "locale": "en",
+ "title": "s13: Background Tasks",
+ "kind": "chapter",
+ "filename": "s13-background-tasks.md",
+ "content": "# s13: Background Tasks\n\n`s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > [ s13 ] > s14 > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n\n- How to run slow commands in background threads while the main loop stays responsive\n- How a thread-safe notification queue delivers results back to the agent\n- How daemon threads keep the process clean on exit\n- How the drain-before-call pattern injects background results at exactly the right moment\n\nYou have a task graph now, and every task can express what it depends on. But there is a practical problem: some tasks involve commands that take minutes. `npm install`, `pytest`, `docker build` -- these block the main loop, and while the agent waits, the user waits too. If the user says \"install dependencies and while that runs, create the config file,\" your agent from s12 does them sequentially because it has no way to start something and come back to it later. This chapter fixes that by adding background execution.\n\n## The Problem\n\nConsider a realistic workflow: the user asks the agent to run a full test suite (which takes 90 seconds) and then set up a configuration file. With a blocking loop, the agent submits the test command, stares at a spinning subprocess for 90 seconds, gets the result, and only then starts the config file. The user watches all of this happen serially. Worse, if there are three slow commands, total wall-clock time is the sum of all three -- even though they could have run in parallel. The agent needs a way to start slow work, give control back to the main loop immediately, and pick up the results later.\n\n## The Solution\n\nKeep the main loop single-threaded, but run slow subprocesses on background daemon threads. When a background command finishes, its result goes into a thread-safe notification queue. Before each LLM call, the main loop drains that queue and injects any completed results into the conversation.\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## How It Works\n\n**Step 1.** Create a `BackgroundManager` that tracks running tasks with a thread-safe notification queue. The lock ensures that the main thread and background threads never corrupt the queue simultaneously.\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n**Step 2.** The `run()` method starts a daemon thread and returns immediately. A daemon thread is one that the Python runtime kills automatically when the main program exits -- you do not need to join it or clean it up.\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n**Step 3.** When the subprocess finishes, the background thread puts its result into the notification queue. The lock makes this safe even if the main thread is draining the queue at the same time.\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n**Step 4.** The agent loop drains notifications before each LLM call. This is the drain-before-call pattern: right before you ask the model to think, sweep up any background results and add them to the conversation so the model sees them in its next turn.\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\" \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\nThis teaching demo keeps the core loop single-threaded; only subprocess waiting is parallelized. A production system would typically split background work into several runtime lanes, but starting with one clean pattern makes the mechanics easy to follow.\n\n## Read Together\n\n- If you have not fully separated \"task goal\" from \"running execution slot,\" read [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) first -- it clarifies why a task record and a runtime record are different objects.\n- If you are unsure which state belongs in `RuntimeTaskRecord` and which still belongs on the task board, keep [`data-structures.md`](./data-structures.md) nearby.\n- If background execution starts to feel like \"another main loop,\" go back to [`s02b-tool-execution-runtime.md`](./s02b-tool-execution-runtime.md) and reset the boundary: execution and waiting can run in parallel, but the main loop is still one mainline.\n\n## What Changed\n\n| Component | Before (s12) | After (s13) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s13_background_tasks.py\n```\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Run slow subprocesses on daemon threads without blocking the main agent loop\n- Collect results through a thread-safe notification queue\n- Inject background results into the conversation using the drain-before-call pattern\n- Let the agent work on other things while long-running commands finish in parallel\n\n## What's Next\n\nBackground tasks solve the problem of slow work that starts now. But what about work that should start later -- \"run this every night\" or \"remind me in 30 minutes\"? In s14 you will add a cron scheduler that stores future intent and triggers it when the time comes.\n\n## Key Takeaway\n\n> Background execution is a runtime lane, not a second main loop -- slow work runs on daemon threads and feeds results back through a single notification queue.\n"
+ },
+ {
+ "version": null,
+ "slug": "s13a-runtime-task-model",
+ "locale": "en",
+ "title": "s13a: Runtime Task Model",
+ "kind": "bridge",
+ "filename": "s13a-runtime-task-model.md",
+ "content": "# s13a: Runtime Task Model\n\n> **Deep Dive** -- Best read between s12 and s13. It prevents the most common confusion in Stage 3.\n\n### When to Read This\n\nRight after s12 (Task System), before you start s13 (Background Tasks). This note separates two meanings of \"task\" that beginners frequently collapse into one.\n\n---\n\n> This bridge note resolves one confusion that becomes expensive very quickly:\n>\n> **the task in the work graph is not the same thing as the task that is currently running**\n\n## How to Read This with the Mainline\n\nThis note works best between these documents:\n\n- read [`s12-task-system.md`](./s12-task-system.md) first to lock in the durable work graph\n- then read [`s13-background-tasks.md`](./s13-background-tasks.md) to see background execution\n- if the terms begin to blur, you might find it helpful to revisit [`glossary.md`](./glossary.md)\n- if you want the fields to line up exactly, you might find it helpful to revisit [`data-structures.md`](./data-structures.md) and [`entity-map.md`](./entity-map.md)\n\n## Why This Deserves Its Own Bridge Note\n\nThe mainline is still correct:\n\n- `s12` teaches the task system\n- `s13` teaches background tasks\n\nBut without one more bridge layer, you can easily start collapsing two different meanings of \"task\" into one bucket.\n\nFor example:\n\n- a work-graph task such as \"implement auth module\"\n- a background execution such as \"run pytest\"\n- a teammate execution such as \"alice is editing files\"\n\nAll three can be casually called tasks, but they do not live on the same layer.\n\n## Two Very Different Kinds of Task\n\n### 1. Work-graph task\n\nThis is the durable node introduced in `s12`.\n\nIt answers:\n\n- what should be done\n- which work depends on which other work\n- who owns it\n- what the progress status is\n\nIt is best understood as:\n\n> a durable unit of planned work\n\n### 2. Runtime task\n\nThis layer answers:\n\n- what execution unit is alive right now\n- what kind of execution it is\n- whether it is running, completed, failed, or killed\n- where its output lives\n\nIt is best understood as:\n\n> a live execution slot inside the runtime\n\n## The Minimum Mental Model\n\nTreat these as two separate tables:\n\n```text\nwork-graph task\n - durable\n - goal and dependency oriented\n - longer lifecycle\n\nruntime task\n - execution oriented\n - output and status oriented\n - shorter lifecycle\n```\n\nTheir relationship is not \"pick one.\"\n\nIt is:\n\n```text\none work-graph task\n can spawn\none or more runtime tasks\n```\n\nFor example:\n\n```text\nwork-graph task:\n \"Implement auth module\"\n\nruntime tasks:\n 1. run tests in the background\n 2. launch a coder teammate\n 3. monitor an external service\n```\n\n## Why the Distinction Matters\n\nIf you do not keep these layers separate, the later chapters start tangling together:\n\n- `s13` background execution blurs into the `s12` task board\n- `s15-s17` teammate work has nowhere clean to attach\n- `s18` worktrees become unclear because you no longer know what layer they belong to\n\nThe shortest correct summary is:\n\n**work-graph tasks manage goals; runtime tasks manage execution**\n\n## Core Records\n\n### 1. `WorkGraphTaskRecord`\n\nThis is the durable task from `s12`.\n\n```python\ntask = {\n \"id\": 12,\n \"subject\": \"Implement auth module\",\n \"status\": \"in_progress\",\n \"blockedBy\": [],\n \"blocks\": [13],\n \"owner\": \"alice\",\n \"worktree\": \"auth-refactor\",\n}\n```\n\n### 2. `RuntimeTaskState`\n\nA minimal teaching shape can look like this:\n\n```python\nruntime_task = {\n \"id\": \"b8k2m1qz\",\n \"type\": \"local_bash\",\n \"status\": \"running\",\n \"description\": \"Run pytest\",\n \"start_time\": 1710000000.0,\n \"end_time\": None,\n \"output_file\": \".task_outputs/b8k2m1qz.txt\",\n \"notified\": False,\n}\n```\n\nThe key fields are:\n\n- `type`: what execution unit this is\n- `status`: whether it is active or terminal\n- `output_file`: where the result is stored\n- `notified`: whether the system already surfaced the result\n\n### 3. `RuntimeTaskType`\n\nYou do not need to implement every type in the teaching repo immediately.\n\nBut you should still know that runtime task is a family, not just one shell command type.\n\nA minimal table:\n\n```text\nlocal_bash\nlocal_agent\nremote_agent\nin_process_teammate\nmonitor\nworkflow\n```\n\n## Minimum Implementation Steps\n\n### Step 1: keep the `s12` task board intact\n\nDo not overload it.\n\n### Step 2: add a separate runtime task manager\n\n```python\nclass RuntimeTaskManager:\n def __init__(self):\n self.tasks = {}\n```\n\n### Step 3: create runtime tasks when background work starts\n\n```python\ndef spawn_bash_task(command: str):\n task_id = new_runtime_id()\n runtime_tasks[task_id] = {\n \"id\": task_id,\n \"type\": \"local_bash\",\n \"status\": \"running\",\n \"description\": command,\n }\n```\n\n### Step 4: optionally link runtime execution back to the work graph\n\n```python\nruntime_tasks[task_id][\"work_graph_task_id\"] = 12\n```\n\nYou do not need that field on day one, but it becomes increasingly important once the system reaches teams and worktrees.\n\n## The Picture You Should Hold\n\n```text\nWork Graph\n task #12: Implement auth module\n |\n +-- runtime task A: local_bash (pytest)\n +-- runtime task B: local_agent (coder worker)\n +-- runtime task C: monitor (watch service status)\n\nRuntime Task Layer\n A/B/C each have:\n - their own runtime ID\n - their own status\n - their own output\n - their own lifecycle\n```\n\n## How This Connects to Later Chapters\n\nOnce this layer is clear, the rest of the runtime and platform chapters become much easier:\n\n- `s13` background commands are runtime tasks\n- `s15-s17` teammates can also be understood as runtime task variants\n- `s18` worktrees mostly bind to durable work, but still affect runtime execution\n- `s19` some monitoring or async external work can also land in the runtime layer\n\nWhenever you see \"something is alive in the background and advancing work,\" ask two questions:\n\n- is this a durable goal from the work graph?\n- or is this a live execution slot in the runtime?\n\n## Common Beginner Mistakes\n\n### 1. Putting background shell state directly into the task board\n\nThat mixes durable task state and runtime execution state.\n\n### 2. Assuming one work-graph task can only have one runtime task\n\nIn real systems, one goal often spawns multiple execution units.\n\n### 3. Reusing the same status vocabulary for both layers\n\nFor example:\n\n- durable tasks: `pending / in_progress / completed`\n- runtime tasks: `running / completed / failed / killed`\n\nThose should stay distinct when possible.\n\n### 4. Ignoring runtime-only fields such as `output_file` and `notified`\n\nThe durable task board does not care much about them.\nThe runtime layer cares a lot.\n\n## Key Takeaway\n\n**\"Task\" means two different things: a durable goal in the work graph (what should be done) and a live execution slot in the runtime (what is running right now). Keep them on separate layers.**\n"
+ },
+ {
+ "version": "s14",
+ "slug": "s14-cron-scheduler",
+ "locale": "en",
+ "title": "s14: Cron Scheduler",
+ "kind": "chapter",
+ "filename": "s14-cron-scheduler.md",
+ "content": "# s14: Cron Scheduler\n\n`s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > [ s14 ] > s15 > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n\n- How schedule records store future intent as durable data\n- How a time-based checker turns cron expressions into triggered notifications\n- The difference between durable jobs (survive restarts) and session-only jobs (die with the process)\n- How scheduled work re-enters the agent system through the same notification queue from s13\n\nIn s13 you learned to run slow work in the background so the agent does not block. But that work still starts immediately -- the user says \"run this\" and it runs now. Real workflows often need work that starts later: \"run this every night,\" \"generate the report every Monday morning,\" \"remind me to check this again in 30 minutes.\" Without scheduling, the user has to re-issue the same request every time. This chapter adds one new idea: store future intent now, trigger it later. And it closes out Stage 3 by completing the progression from durable tasks (s12) to background execution (s13) to time-based triggers (s14).\n\n## The Problem\n\nYour agent can now manage a task graph and run commands in the background. But every piece of work begins with the user explicitly asking for it. If the user wants a nightly test run, they have to remember to type \"run the tests\" every evening. If they want a weekly status report, they have to open a session every Monday morning. The agent has no concept of future time -- it reacts to what you say right now, and it cannot act on something you want to happen tomorrow. You need a way to record \"do X at time Y\" and have the system trigger it automatically.\n\n## The Solution\n\nAdd three moving parts: schedule records that describe when and what, a time checker that runs in the background and tests whether any schedule matches the current time, and the same notification queue from s13 to feed triggered work back into the main loop.\n\n```text\nschedule_create(...)\n ->\nwrite a durable schedule record\n ->\ntime checker wakes up and tests \"does this rule match now?\"\n ->\nif yes, enqueue a scheduled notification\n ->\nmain loop injects that notification as new work\n```\n\nThe key insight is that the scheduler is not a second agent loop. It feeds triggered prompts into the same system the agent already uses. The main loop does not know or care whether a piece of work came from the user typing it or from a cron trigger -- it processes both the same way.\n\n## How It Works\n\n**Step 1.** Define the schedule record. Each job stores a cron expression (a compact time-matching syntax like `0 9 * * 1` meaning \"9:00 AM every Monday\"), the prompt to execute, whether it recurs or fires once, and a `last_fired_at` timestamp to prevent double-firing.\n\n```python\nschedule = {\n \"id\": \"job_001\",\n \"cron\": \"0 9 * * 1\",\n \"prompt\": \"Run the weekly status report.\",\n \"recurring\": True,\n \"durable\": True,\n \"created_at\": 1710000000.0,\n \"last_fired_at\": None,\n}\n```\n\nA durable job is written to disk and survives process restarts. A session-only job lives in memory and dies when the agent exits. One-shot jobs (`recurring: False`) fire once and then delete themselves.\n\n**Step 2.** Create a schedule through a tool call. The method stores the record and returns it so the model can confirm what was scheduled.\n\n```python\ndef create(self, cron_expr: str, prompt: str, recurring: bool = True):\n job = {\n \"id\": new_id(),\n \"cron\": cron_expr,\n \"prompt\": prompt,\n \"recurring\": recurring,\n \"created_at\": time.time(),\n \"last_fired_at\": None,\n }\n self.jobs.append(job)\n return job\n```\n\n**Step 3.** Run a background checker loop that wakes up every 60 seconds and tests each schedule against the current time.\n\n```python\ndef check_loop(self):\n while True:\n now = datetime.now()\n self.check_jobs(now)\n time.sleep(60)\n```\n\n**Step 4.** When a schedule matches, enqueue a notification. The `last_fired_at` field is updated to prevent the same minute from triggering the job twice.\n\n```python\ndef check_jobs(self, now):\n for job in self.jobs:\n if cron_matches(job[\"cron\"], now):\n self.queue.put({\n \"type\": \"scheduled_prompt\",\n \"schedule_id\": job[\"id\"],\n \"prompt\": job[\"prompt\"],\n })\n job[\"last_fired_at\"] = now.timestamp()\n```\n\n**Step 5.** Feed scheduled notifications back into the main loop using the same drain pattern from s13. From the agent's perspective, a scheduled prompt looks just like a user message.\n\n```python\nnotifications = scheduler.drain()\nfor item in notifications:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"[scheduled:{item['schedule_id']}] {item['prompt']}\",\n })\n```\n\n## Read Together\n\n- If `schedule`, `task`, and `runtime task` still feel like the same object, reread [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) -- it draws the boundary between planning records, execution records, and schedule records.\n- If you want to see how one trigger eventually returns to the mainline, pair this chapter with [`s00b-one-request-lifecycle.md`](./s00b-one-request-lifecycle.md).\n- If future triggers start to feel like a whole second execution system, reset with [`data-structures.md`](./data-structures.md) and separate schedule records from runtime records.\n\n## What Changed\n\n| Mechanism | Main question |\n|---|---|\n| Background tasks (s13) | \"How does slow work continue without blocking?\" |\n| Scheduling (s14) | \"When should future work begin?\" |\n\n| Component | Before (s13) | After (s14) |\n|---|---|---|\n| Tools | 6 (base + background) | 8 (+ schedule_create, schedule_list, schedule_delete) |\n| Time awareness | None | Cron-based future triggers |\n| Persistence | Background tasks in memory | Durable schedules survive restarts |\n| Trigger model | User-initiated only | User-initiated + time-triggered |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s14_cron_scheduler.py\n```\n\n1. Create a repeating schedule: `Schedule \"echo hello\" to run every 2 minutes`\n2. Create a one-shot reminder: `Remind me in 1 minute to check the build`\n3. Create a delayed follow-up: `In 5 minutes, run the test suite and report results`\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Define schedule records that store future intent as durable data\n- Run a background time checker that matches cron expressions to the current clock\n- Distinguish durable jobs (persist to disk) from session-only jobs (in-memory)\n- Feed scheduled triggers back into the main loop through the same notification queue used by background tasks\n- Prevent double-firing with `last_fired_at` tracking\n\n## Stage 3 Complete\n\nYou have finished Stage 3: the execution and scheduling layer. Looking back at the three chapters together:\n\n- **s12** gave the agent a task graph with dependencies and persistence -- it can plan structured work that survives restarts.\n- **s13** added background execution -- slow work runs in parallel instead of blocking the loop.\n- **s14** added time-based triggers -- the agent can schedule future work without the user having to remember.\n\nTogether, these three chapters transform the agent from something that only reacts to what you type right now into something that can plan ahead, work in parallel, and act on its own schedule. In Stage 4 (s15-s18), you will use this foundation to coordinate multiple agents working as a team.\n\n## Key Takeaway\n\n> A scheduler stores future intent as a record, checks it against the clock in a background loop, and feeds triggered work back into the same agent system -- no second loop needed.\n"
+ },
+ {
+ "version": "s15",
+ "slug": "s15-agent-teams",
+ "locale": "en",
+ "title": "s15: Agent Teams",
+ "kind": "chapter",
+ "filename": "s15-agent-teams.md",
+ "content": "# s15: Agent Teams\n\n`s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > [ s15 ] > s16 > s17 > s18 > s19`\n\n## What You'll Learn\n- How persistent teammates differ from disposable subagents\n- How JSONL-based inboxes give agents a durable communication channel\n- How the team lifecycle moves through spawn, working, idle, and shutdown\n- How file-based coordination lets multiple agent loops run side by side\n\nSometimes one agent is not enough. A complex project -- say, building a feature that involves frontend, backend, and tests -- needs multiple workers running in parallel, each with its own identity and memory. In this chapter you will build a team system where agents persist beyond a single prompt, communicate through file-based mailboxes, and coordinate without sharing a single conversation thread.\n\n## The Problem\n\nSubagents from s04 are disposable: you spawn one, it works, it returns a summary, and it dies. It has no identity and no memory between invocations. Background tasks from s13 can keep work running in the background, but they are not persistent teammates making their own LLM-guided decisions.\n\nReal teamwork needs three things: (1) persistent agents that outlive a single prompt, (2) identity and lifecycle management so you know who is doing what, and (3) a communication channel between agents so they can exchange information without the lead manually relaying every message.\n\n## The Solution\n\nThe harness maintains a team roster in a shared config file and gives each teammate an append-only JSONL inbox. When one agent sends a message to another, it simply appends a JSON line to the recipient's inbox file. The recipient drains that file before every LLM call.\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## How It Works\n\n**Step 1.** `TeammateManager` maintains `config.json` with the team roster. It tracks every teammate's name, role, and current status.\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n**Step 2.** `spawn()` creates a teammate entry in the roster and starts its agent loop in a separate thread. From this point on, the teammate runs independently -- it has its own conversation history, its own tool calls, and its own LLM interactions.\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n**Step 3.** `MessageBus` provides append-only JSONL inboxes. `send()` appends a single JSON line to the recipient's file; `read_inbox()` reads all accumulated messages and then empties the file (\"drains\" it). The storage format is intentionally simple -- the teaching focus here is the mailbox boundary, not storage cleverness.\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n**Step 4.** Each teammate checks its inbox before every LLM call. Any received messages get injected into the conversation context so the model can see and respond to them.\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## Read Together\n\n- If you still treat a teammate like s04's disposable subagent, revisit [`entity-map.md`](./entity-map.md) to see how they differ.\n- If you plan to continue into s16-s18, keep [`team-task-lane-model.md`](./team-task-lane-model.md) open -- it separates teammate, protocol request, task, runtime slot, and worktree lane into distinct concepts.\n- If you are unsure how a long-lived teammate differs from a live runtime slot, pair this chapter with [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md).\n\n## How It Plugs Into The Earlier System\n\nThis chapter is not just \"more model calls.\" It adds durable executors on top of work structures you already built in s12-s14.\n\n```text\nlead identifies work that needs a long-lived worker\n ->\nspawn teammate\n ->\nwrite roster entry in .team/config.json\n ->\nsend inbox message / task hint\n ->\nteammate drains inbox before its next loop\n ->\nteammate runs its own agent loop and tools\n ->\nresult returns through team messages or task updates\n```\n\nKeep the boundary straight:\n\n- s12-s14 gave you tasks, runtime slots, and schedules\n- s15 adds durable named workers\n- s15 is still mostly lead-assigned work\n- structured protocols arrive in s16\n- autonomous claiming arrives in s17\n\n## Teammate vs Subagent vs Runtime Slot\n\n| Mechanism | Think of it as | Lifecycle | Main boundary |\n|---|---|---|---|\n| subagent | a disposable helper | spawn -> work -> summary -> gone | isolates one exploratory branch |\n| runtime slot | a live execution slot | exists while background work is running | tracks long-running execution, not identity |\n| teammate | a durable worker | can go idle, resume, and keep receiving work | has a name, inbox, and independent loop |\n\n## What Changed From s14\n\n| Component | Before (s14) | After (s15) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | message + broadcast |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s15_agent_teams.py\n```\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. Type `/team` to see the team roster with statuses\n5. Type `/inbox` to manually check the lead's inbox\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Spawn persistent teammates that each run their own independent agent loop\n- Send messages between agents through durable JSONL inboxes\n- Track teammate status through a shared config file\n- Coordinate multiple agents without funneling everything through a single conversation\n\n## What's Next\n\nYour teammates can now communicate freely, but they lack coordination rules. What happens when you need to shut a teammate down cleanly, or review a risky plan before it executes? In s16, you will add structured protocols -- request-response handshakes that bring order to multi-agent negotiation.\n\n## Key Takeaway\n\n> Teammates persist beyond one prompt, each with identity, lifecycle, and a durable mailbox -- coordination is no longer limited to a single parent loop.\n"
+ },
+ {
+ "version": "s16",
+ "slug": "s16-team-protocols",
"locale": "en",
- "title": "s12: Worktree + Task Isolation",
- "content": "# s12: Worktree + Task Isolation\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"Each works in its own directory, no interference\"* -- tasks manage goals, worktrees manage directories, bound by ID.\n\n## Problem\n\nBy s11, agents can claim and complete tasks autonomously. But every task runs in one shared directory. Two agents refactoring different modules at the same time will collide: agent A edits `config.py`, agent B edits `config.py`, unstaged changes mix, and neither can roll back cleanly.\n\nThe task board tracks *what to do* but has no opinion about *where to do it*. The fix: give each task its own git worktree directory. Tasks manage goals, worktrees manage execution context. Bind them by task ID.\n\n## Solution\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## How It Works\n\n1. **Create a task.** Persist the goal first.\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **Create a worktree and bind to the task.** Passing `task_id` auto-advances the task to `in_progress`.\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\nThe binding writes state to both sides:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **Run commands in the worktree.** `cwd` points to the isolated directory.\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **Close out.** Two choices:\n - `worktree_keep(name)` -- preserve the directory for later.\n - `worktree_remove(name, complete_task=True)` -- remove directory, complete the bound task, emit event. One call handles teardown + completion.\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **Event stream.** Every lifecycle step emits to `.worktrees/events.jsonl`:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\nEvents emitted: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`.\n\nAfter a crash, state reconstructs from `.tasks/` + `.worktrees/index.json` on disk. Conversation memory is volatile; file state is durable.\n\n## What Changed From s11\n\n| Component | Before (s11) | After (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n"
+ "title": "s16: Team Protocols",
+ "kind": "chapter",
+ "filename": "s16-team-protocols.md",
+ "content": "# s16: Team Protocols\n\n`s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > [ s16 ] > s17 > s18 > s19`\n\n## What You'll Learn\n- How a request-response pattern with a tracking ID structures multi-agent negotiation\n- How the shutdown protocol lets a lead gracefully stop a teammate\n- How plan approval gates risky work behind a review step\n- How one reusable FSM (a simple status tracker with defined transitions) covers both protocols\n\nIn s15 your teammates can send messages freely, but that freedom comes with chaos. One agent tells another \"please stop,\" and the other ignores it. A teammate starts a risky database migration without asking first. The problem is not communication itself -- you solved that with inboxes -- but the lack of coordination rules. In this chapter you will add structured protocols: a standardized message wrapper with a tracking ID that turns loose messages into reliable handshakes.\n\n## The Problem\n\nTwo coordination gaps become obvious once your team grows past toy examples:\n\n**Shutdown.** Killing a teammate's thread leaves files half-written and the config roster stale. You need a handshake: the lead requests shutdown, and the teammate approves (finishes current work and exits cleanly) or rejects (keeps working because it has unfinished obligations).\n\n**Plan approval.** When the lead says \"refactor the auth module,\" the teammate starts immediately. But for high-risk changes, the lead should review the plan before any code gets written.\n\nBoth scenarios share an identical structure: one side sends a request carrying a unique ID, the other side responds referencing that same ID. That single pattern is enough to build any coordination protocol you need.\n\n## The Solution\n\nBoth shutdown and plan approval follow one shape: send a request with a `request_id`, receive a response referencing that same `request_id`, and track the outcome through a simple status machine (`pending -> approved` or `pending -> rejected`).\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## How It Works\n\n**Step 1.** The lead initiates shutdown by generating a unique `request_id` and sending the request through the teammate's inbox. The request is tracked in a dictionary so the lead can check its status later.\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n**Step 2.** The teammate receives the request in its inbox and responds with approve or reject. The response carries the same `request_id` so the lead can match it to the original request -- this is the correlation that makes the protocol reliable.\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n**Step 3.** Plan approval follows the identical pattern but in the opposite direction. The teammate submits a plan (generating a `request_id`), and the lead reviews it (referencing the same `request_id` to approve or reject).\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\nIn this teaching demo, one FSM shape covers both protocols. A production system might treat different protocol families differently, but the teaching version intentionally keeps one reusable template so you can see the shared structure clearly.\n\n## Read Together\n\n- If plain messages and protocol requests are starting to blur together, revisit [`glossary.md`](./glossary.md) and [`entity-map.md`](./entity-map.md) to see how they differ.\n- If you plan to continue into s17 and s18, read [`team-task-lane-model.md`](./team-task-lane-model.md) first so autonomy and worktree lanes do not collapse into one idea.\n- If you want to trace how a protocol request returns to the main system, pair this chapter with [`s00b-one-request-lifecycle.md`](./s00b-one-request-lifecycle.md).\n\n## How It Plugs Into The Team System\n\nThe real upgrade in s16 is not \"two new message types.\" It is a durable coordination path:\n\n```text\nrequester starts a protocol action\n ->\nwrite RequestRecord\n ->\nsend ProtocolEnvelope through inbox\n ->\nreceiver drains inbox on its next loop\n ->\nupdate request status by request_id\n ->\nsend structured response\n ->\nrequester continues based on approved / rejected\n```\n\nThat is the missing layer between \"agents can chat\" and \"agents can coordinate reliably.\"\n\n## Message vs Protocol vs Request vs Task\n\n| Object | What question it answers | Typical fields |\n|---|---|---|\n| `MessageEnvelope` | who said what to whom | `from`, `to`, `content` |\n| `ProtocolEnvelope` | is this a structured request / response | `type`, `request_id`, `payload` |\n| `RequestRecord` | where is this coordination flow now | `kind`, `status`, `from`, `to` |\n| `TaskRecord` | what actual work item is being advanced | `subject`, `status`, `blockedBy`, `owner` |\n\nDo not collapse them:\n\n- a protocol request is not the task itself\n- the request store is not the task board\n- protocols track coordination flow\n- tasks track work progression\n\n## What Changed From s15\n\n| Component | Before (s15) | After (s16) |\n|----------------|------------------|------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\n| Shutdown | Natural exit only| Request-response handshake |\n| Plan gating | None | Submit/review with approval |\n| Correlation | None | request_id per request |\n| FSM | None | pending -> approved/rejected |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s16_team_protocols.py\n```\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. Type `/team` to monitor statuses\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Build request-response protocols that use a unique ID for correlation\n- Implement graceful shutdown through a two-step handshake\n- Gate risky work behind a plan approval step\n- Reuse a single FSM pattern (`pending -> approved/rejected`) for any new protocol you invent\n\n## What's Next\n\nYour team now has structure and rules, but the lead still has to babysit every teammate -- assigning tasks one by one, nudging idle workers. In s17, you will make teammates autonomous: they scan the task board themselves, claim unclaimed work, and resume after context compression without losing their identity.\n\n## Key Takeaway\n\n> A protocol request is a structured message with a tracking ID, and the response must reference that same ID -- that single pattern is enough to build any coordination handshake.\n"
+ },
+ {
+ "version": "s17",
+ "slug": "s17-autonomous-agents",
+ "locale": "en",
+ "title": "s17: Autonomous Agents",
+ "kind": "chapter",
+ "filename": "s17-autonomous-agents.md",
+ "content": "# s17: Autonomous Agents\n\n`s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > [ s17 ] > s18 > s19`\n\n## What You'll Learn\n- How idle polling lets a teammate find new work without being told\n- How auto-claim turns the task board into a self-service work queue\n- How identity re-injection restores a teammate's sense of self after context compression\n- How a timeout-based shutdown prevents idle agents from running forever\n\nManual assignment does not scale. With ten unclaimed tasks on the board, the lead has to pick one, find an idle teammate, craft a prompt, and hand it off -- ten times. The lead becomes a bottleneck, spending more time dispatching than thinking. In this chapter you will remove that bottleneck by making teammates autonomous: they scan the task board themselves, claim unclaimed work, and shut down gracefully when there is nothing left to do.\n\n## The Problem\n\nIn s15-s16, teammates only work when explicitly told to. The lead must spawn each one with a specific prompt. If ten tasks sit unclaimed on the board, the lead assigns each one manually. This creates a coordination bottleneck that gets worse as the team grows.\n\nTrue autonomy means teammates scan the task board themselves, claim unclaimed tasks, work on them, then look for more -- all without the lead lifting a finger.\n\nOne subtlety makes this harder than it sounds: after context compression (which you built in s06), an agent's conversation history gets truncated. The agent might forget who it is. Identity re-injection fixes this by restoring the agent's name and role when its context gets too short.\n\n## The Solution\n\nEach teammate alternates between two phases: WORK (calling the LLM and executing tools) and IDLE (polling for new messages or unclaimed tasks). If the idle phase times out with nothing to do, the teammate shuts itself down.\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## How It Works\n\n**Step 1.** The teammate loop has two phases: WORK and IDLE. During the work phase, the teammate calls the LLM repeatedly and executes tools. When the LLM stops calling tools (or the teammate explicitly calls the `idle` tool), it transitions to the idle phase.\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n**Step 2.** The idle phase polls for two things in a loop: inbox messages and unclaimed tasks. It checks every 5 seconds for up to 60 seconds. If a message arrives, the teammate wakes up. If an unclaimed task appears on the board, the teammate claims it and gets back to work. If neither happens within the timeout window, the teammate shuts itself down.\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']} \"})\n return True\n return False # timeout -> shutdown\n```\n\n**Step 3.** Task board scanning finds pending, unowned, unblocked tasks. The scan reads task files from disk and filters for tasks that are available to claim -- no owner, no blocking dependencies, and still in `pending` status.\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n**Step 4.** Identity re-injection handles a subtle problem. After context compression (s06), the conversation history might shrink to just a few messages -- and the agent forgets who it is. When the message list is suspiciously short (3 or fewer messages), the harness inserts an identity block at the beginning so the agent knows its name, role, and team.\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work. \"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## Read Together\n\n- If teammate, task, and runtime slot are starting to blur into one layer, revisit [`team-task-lane-model.md`](./team-task-lane-model.md) to separate them clearly.\n- If auto-claim makes you wonder where the live execution slot actually lives, keep [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) nearby.\n- If you are starting to forget the core difference between a persistent teammate and a one-shot subagent, revisit [`entity-map.md`](./entity-map.md).\n\n## What Changed From s16\n\n| Component | Before (s16) | After (s17) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s17_autonomous_agents.py\n```\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. Type `/tasks` to see the task board with owners\n5. Type `/team` to monitor who is working vs idle\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Build teammates that find and claim work from a shared task board without lead intervention\n- Implement an idle polling loop that balances responsiveness with resource efficiency\n- Restore agent identity after context compression so long-running teammates stay coherent\n- Use timeout-based shutdown to prevent abandoned agents from running indefinitely\n\n## What's Next\n\nYour teammates now organize themselves, but they all share the same working directory. When two agents edit the same file at the same time, things break. In s18, you will give each teammate its own isolated worktree -- a separate copy of the codebase where it can work without stepping on anyone else's changes.\n\n## Key Takeaway\n\n> Autonomous teammates scan the task board, claim unclaimed work, and shut down when idle -- removing the lead as a coordination bottleneck.\n"
+ },
+ {
+ "version": "s18",
+ "slug": "s18-worktree-task-isolation",
+ "locale": "en",
+ "title": "s18: Worktree + Task Isolation",
+ "kind": "chapter",
+ "filename": "s18-worktree-task-isolation.md",
+ "content": "# s18: Worktree + Task Isolation\n\n`s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > [ s18 ] > s19`\n\n## What You'll Learn\n- How git worktrees (isolated copies of your project directory, managed by git) prevent file conflicts between parallel agents\n- How to bind a task to a dedicated worktree so that \"what to do\" and \"where to do it\" stay cleanly separated\n- How lifecycle events give you an observable record of every create, keep, and remove action\n- How parallel execution lanes let multiple agents work on different tasks without ever stepping on each other's files\n\nWhen two agents both need to edit the same codebase at the same time, you have a problem. Everything you have built so far -- task boards, autonomous agents, team protocols -- assumes that agents work in a single shared directory. That works fine until it does not. This chapter gives every task its own directory, so parallel work stays parallel.\n\n## The Problem\n\nBy s17, your agents can claim tasks, coordinate through team protocols, and complete work autonomously. But all of them run in the same project directory. Imagine agent A is refactoring the authentication module, and agent B is building a new login page. Both need to touch `config.py`. Agent A stages its changes, agent B stages different changes to the same file, and now you have a tangled mess of unstaged edits that neither agent can roll back cleanly.\n\nThe task board tracks *what to do* but has no opinion about *where to do it*. You need a way to give each task its own isolated working directory, so that file-level operations never collide. The fix is straightforward: pair each task with a git worktree -- a separate checkout of the same repository on its own branch. Tasks manage goals; worktrees manage execution context. Bind them by task ID.\n\n## Read Together\n\n- If task, runtime slot, and worktree lane are blurring together in your head, [`team-task-lane-model.md`](./team-task-lane-model.md) separates them clearly.\n- If you want to confirm which fields belong on task records versus worktree records, [`data-structures.md`](./data-structures.md) has the full schema.\n- If you want to see why this chapter comes after tasks and teams in the overall curriculum, [`s00e-reference-module-map.md`](./s00e-reference-module-map.md) has the ordering rationale.\n\n## The Solution\n\nThe system splits into two planes: a control plane (`.tasks/`) that tracks goals, and an execution plane (`.worktrees/`) that manages isolated directories. Each task points to its worktree by name, and each worktree points back to its task by ID.\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## How It Works\n\n**Step 1.** Create a task. The goal is recorded first, before any directory exists.\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n**Step 2.** Create a worktree and bind it to the task. Passing `task_id` automatically advances the task to `in_progress` -- you do not need to update the status separately.\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\nThe binding writes state to both sides so you can traverse the relationship from either direction:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n**Step 3.** Run commands in the worktree. The key detail: `cwd` points to the isolated directory, not your main project root. Every file operation happens in a sandbox that cannot collide with other worktrees.\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n**Step 4.** Close out the worktree. You have two choices, depending on whether the work is done:\n\n- `worktree_keep(name)` -- preserve the directory for later (useful when a task is paused or needs review).\n- `worktree_remove(name, complete_task=True)` -- remove the directory, mark the bound task as completed, and emit an event. One call handles teardown and completion together.\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n**Step 5.** Observe the event stream. Every lifecycle step emits a structured event to `.worktrees/events.jsonl`, giving you a complete audit trail of what happened and when:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\nEvents emitted: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`.\n\nIn the teaching version, `.tasks/` plus `.worktrees/index.json` are enough to reconstruct the visible control-plane state after a crash. The important lesson is not every production edge case. The important lesson is that goal state and execution-lane state must both stay legible on disk.\n\n## What Changed From s17\n\n| Component | Before (s17) | After (s18) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s18_worktree_task_isolation.py\n```\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Create isolated git worktrees so that parallel agents never produce file conflicts\n- Bind tasks to worktrees with a two-way reference (task points to worktree name, worktree points to task ID)\n- Choose between keeping and removing a worktree at closeout, with automatic task status updates\n- Read the event stream in `events.jsonl` to understand the full lifecycle of every worktree\n\n## What's Next\n\nYou now have agents that can work in complete isolation, each in its own directory with its own branch. But every capability they use -- bash, read, write, edit -- is hard-coded into your Python harness. In s19, you will learn how external programs can provide new capabilities through MCP (Model Context Protocol), so your agent can grow without changing its core code.\n\n## Key Takeaway\n\n> Tasks answer *what work is being done*; worktrees answer *where that work runs*; keeping them separate makes parallel systems far easier to reason about and recover from.\n"
+ },
+ {
+ "version": "s19",
+ "slug": "s19-mcp-plugin",
+ "locale": "en",
+ "title": "s19: MCP & Plugin",
+ "kind": "chapter",
+ "filename": "s19-mcp-plugin.md",
+ "content": "# s19: MCP & Plugin\n\n`s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > [ s19 ]`\n\n## What You'll Learn\n- How MCP (Model Context Protocol -- a standard way for the agent to talk to external capability servers) lets your agent gain new tools without changing its core code\n- How tool name normalization with a `mcp__{server}__{tool}` prefix keeps external tools from colliding with native ones\n- How a unified router dispatches tool calls to local handlers or remote servers through the same path\n- How plugin manifests let external capability servers be discovered and launched automatically\n\nUp to this point, every tool your agent uses -- bash, read, write, edit, tasks, worktrees -- lives inside your Python harness. You wrote each one by hand. That works well for a teaching codebase, but a real agent needs to talk to databases, browsers, cloud services, and tools that do not exist yet. Hard-coding every possible capability is not sustainable. This chapter shows how external programs can join your agent through the same tool-routing plane you already built.\n\n## The Problem\n\nYour agent is powerful, but its capabilities are frozen at build time. If you want it to query a Postgres database, you write a new Python handler. If you want it to control a browser, you write another handler. Every new capability means changing the core harness, re-testing the tool router, and redeploying. Meanwhile, other teams are building specialized servers that already know how to talk to these systems. You need a standard protocol so those external servers can expose their tools to your agent, and your agent can call them as naturally as it calls its own native tools -- without rewriting the core loop every time.\n\n## The Solution\n\nMCP gives your agent a standard way to connect to external capability servers over stdio. The agent starts a server process, asks what tools it provides, normalizes their names with a prefix, and routes calls to that server -- all through the same tool pipeline that handles native tools.\n\n```text\nLLM\n |\n | asks to call a tool\n v\nAgent tool router\n |\n +-- native tool -> local Python handler\n |\n +-- MCP tool -> external MCP server\n |\n v\n return result\n```\n\n## Read Together\n\n- If you want to understand how MCP fits into the broader capability surface beyond just tools (resources, prompts, plugin discovery), [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md) covers the full platform boundary.\n- If you want to confirm that external capabilities still return through the same execution surface as native tools, pair this chapter with [`s02b-tool-execution-runtime.md`](./s02b-tool-execution-runtime.md).\n- If query control and external capability routing are drifting apart in your mental model, [`s00a-query-control-plane.md`](./s00a-query-control-plane.md) ties them together.\n\n## How It Works\n\nThere are three essential pieces. Once you understand them, MCP stops being mysterious.\n\n**Step 1.** Build an `MCPClient` that manages the connection to one external server. It starts the server process over stdio, sends a handshake, and caches the list of available tools.\n\n```python\nclass MCPClient:\n def __init__(self, server_name, command, args=None, env=None):\n self.server_name = server_name\n self.command = command\n self.args = args or []\n self.process = None\n self._tools = []\n\n def connect(self):\n self.process = subprocess.Popen(\n [self.command] + self.args,\n stdin=subprocess.PIPE, stdout=subprocess.PIPE,\n stderr=subprocess.PIPE, text=True,\n )\n self._send({\"method\": \"initialize\", \"params\": {\n \"protocolVersion\": \"2024-11-05\",\n \"capabilities\": {},\n \"clientInfo\": {\"name\": \"teaching-agent\", \"version\": \"1.0\"},\n }})\n response = self._recv()\n if response and \"result\" in response:\n self._send({\"method\": \"notifications/initialized\"})\n return True\n return False\n\n def list_tools(self):\n self._send({\"method\": \"tools/list\", \"params\": {}})\n response = self._recv()\n if response and \"result\" in response:\n self._tools = response[\"result\"].get(\"tools\", [])\n return self._tools\n\n def call_tool(self, tool_name, arguments):\n self._send({\"method\": \"tools/call\", \"params\": {\n \"name\": tool_name, \"arguments\": arguments,\n }})\n response = self._recv()\n if response and \"result\" in response:\n content = response[\"result\"].get(\"content\", [])\n return \"\\n\".join(c.get(\"text\", str(c)) for c in content)\n return \"MCP Error: no response\"\n```\n\n**Step 2.** Normalize external tool names with a prefix so they never collide with native tools. The convention is simple: `mcp__{server}__{tool}`.\n\n```text\nmcp__postgres__query\nmcp__browser__open_tab\n```\n\nThis prefix serves double duty: it prevents name collisions, and it tells the router exactly which server should handle the call.\n\n```python\ndef get_agent_tools(self):\n agent_tools = []\n for tool in self._tools:\n prefixed_name = f\"mcp__{self.server_name}__{tool['name']}\"\n agent_tools.append({\n \"name\": prefixed_name,\n \"description\": tool.get(\"description\", \"\"),\n \"input_schema\": tool.get(\"inputSchema\", {\n \"type\": \"object\", \"properties\": {}\n }),\n })\n return agent_tools\n```\n\n**Step 3.** Build one unified router. The router does not care whether a tool is native or external beyond the dispatch decision. If the name starts with `mcp__`, route to the MCP server; otherwise, call the local handler. This keeps the agent loop untouched -- it just sees a flat list of tools.\n\n```python\nif tool_name.startswith(\"mcp__\"):\n return mcp_router.call(tool_name, arguments)\nelse:\n return native_handler(arguments)\n```\n\n**Step 4.** Add plugin discovery. If MCP answers \"how does the agent talk to an external capability server,\" plugins answer \"how are those servers discovered and configured?\" A minimal plugin is a manifest file that tells the harness which servers to launch:\n\n```json\n{\n \"name\": \"my-db-tools\",\n \"version\": \"1.0.0\",\n \"mcpServers\": {\n \"postgres\": {\n \"command\": \"npx\",\n \"args\": [\"-y\", \"@modelcontextprotocol/server-postgres\"]\n }\n }\n}\n```\n\nThis lives in `.claude-plugin/plugin.json`. The `PluginLoader` scans for these manifests, extracts the server configs, and hands them to the `MCPToolRouter` for connection.\n\n**Step 5.** Enforce the safety boundary. This is the most important rule of the entire chapter: external tools must still pass through the same permission gate as native tools. If MCP tools bypass permission checks, you have created a security backdoor at the edge of your system.\n\n```python\ndecision = permission_gate.check(block.name, block.input or {})\n# Same check for \"bash\", \"read_file\", and \"mcp__postgres__query\"\n```\n\n## How It Plugs Into The Full Harness\n\nMCP gets confusing when it is treated like a separate universe. The cleaner model is:\n\n```text\nstartup\n ->\nplugin loader finds manifests\n ->\nserver configs are extracted\n ->\nMCP clients connect and list tools\n ->\nexternal tools are normalized into the same tool pool\n\nruntime\n ->\nLLM emits tool_use\n ->\nshared permission gate\n ->\nnative route or MCP route\n ->\nresult normalization\n ->\ntool_result returns to the same loop\n```\n\nDifferent entry point, same control plane and execution plane.\n\n## Plugin vs Server vs Tool\n\n| Layer | What it is | What it is for |\n|---|---|---|\n| plugin manifest | a config declaration | tells the harness which servers to discover and launch |\n| MCP server | an external process / connection | exposes a set of capabilities |\n| MCP tool | one callable capability from that server | the concrete thing the model invokes |\n\nShortest memory aid:\n\n- plugin = discovery\n- server = connection\n- tool = invocation\n\n## Key Data Structures\n\n### Server config\n\n```python\n{\n \"command\": \"npx\",\n \"args\": [\"-y\", \"...\"],\n \"env\": {}\n}\n```\n\n### Normalized external tool definition\n\n```python\n{\n \"name\": \"mcp__postgres__query\",\n \"description\": \"Run a SQL query\",\n \"input_schema\": {...}\n}\n```\n\n### Client registry\n\n```python\nclients = {\n \"postgres\": mcp_client_instance\n}\n```\n\n## What Changed From s18\n\n| Component | Before (s18) | After (s19) |\n|--------------------|-----------------------------------|--------------------------------------------------|\n| Tool sources | All native (local Python) | Native + external MCP servers |\n| Tool naming | Flat names (`bash`, `read_file`) | Prefixed for externals (`mcp__postgres__query`) |\n| Routing | Single handler map | Unified router: native dispatch + MCP dispatch |\n| Capability growth | Edit harness code for each tool | Add a plugin manifest or connect a server |\n| Permission scope | Native tools only | Native + external tools through same gate |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s19_mcp_plugin.py\n```\n\n1. Watch how external tools are discovered from plugin manifests at startup.\n2. Type `/tools` to see native and MCP tools listed side by side in one flat pool.\n3. Type `/mcp` to see which MCP servers are connected and how many tools each provides.\n4. Ask the agent to use a tool and notice how results return through the same loop as local tools.\n\n## What You've Mastered\n\nAt this point, you can:\n\n- Connect to external capability servers using the MCP stdio protocol\n- Normalize external tool names with a `mcp__{server}__{tool}` prefix to prevent collisions\n- Route tool calls through a unified dispatcher that handles both native and MCP tools\n- Discover and launch MCP servers automatically through plugin manifests\n- Enforce the same permission checks on external tools as on native ones\n\n## The Full Picture\n\nYou have now walked through the complete design backbone of a production coding agent, from s01 to s19.\n\nYou started with a bare agent loop that calls an LLM and appends tool results. You added tool use, then a persistent task list, then subagents, skill loading, and context compaction. You built a permission system, a hook system, and a memory system. You constructed the system prompt pipeline, added error recovery, and gave agents a full task board with background execution and cron scheduling. You organized agents into teams with coordination protocols, made them autonomous, gave each task its own isolated worktree, and finally opened the door to external capabilities through MCP.\n\nEach chapter added exactly one idea to the system. None of them required you to throw away what came before. The agent you have now is not a toy -- it is a working model of the same architectural decisions that shape real production agents.\n\nIf you want to test your understanding, try rebuilding the complete system from scratch. Start with the agent loop. Add tools. Add tasks. Keep going until you reach MCP. If you can do that without looking back at the chapters, you understand the design. And if you get stuck somewhere in the middle, the chapter that covers that idea will be waiting for you.\n\n## Key Takeaway\n\n> External capabilities should enter the same tool pipeline as native ones -- same naming, same routing, same permissions -- so the agent loop never needs to know the difference.\n"
+ },
+ {
+ "version": null,
+ "slug": "s19a-mcp-capability-layers",
+ "locale": "en",
+ "title": "s19a: MCP Capability Layers",
+ "kind": "bridge",
+ "filename": "s19a-mcp-capability-layers.md",
+ "content": "# s19a: MCP Capability Layers\n\n> **Deep Dive** -- Best read alongside s19. It shows that MCP is more than just external tools.\n\n### When to Read This\n\nAfter reading s19's tools-first approach, when you're ready to see the full MCP capability stack.\n\n---\n\n> `s19` should still keep a tools-first mainline.\n> This bridge note adds the second mental model:\n>\n> **MCP is not only external tool access. It is a stack of capability layers.**\n\n## How to Read This with the Mainline\n\nIf you want to study MCP without drifting away from the teaching goal:\n\n- read [`s19-mcp-plugin.md`](./s19-mcp-plugin.md) first and keep the tools-first path clear\n- then you might find it helpful to revisit [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md) to see how external capability routes back into the unified tool bus\n- if state records begin to blur, you might find it helpful to revisit [`data-structures.md`](./data-structures.md)\n- if concept boundaries blur, you might find it helpful to revisit [`glossary.md`](./glossary.md) and [`entity-map.md`](./entity-map.md)\n\n## Why This Deserves a Separate Bridge Note\n\nFor a teaching repo, keeping the mainline focused on external tools first is correct.\n\nThat is the easiest entry:\n\n- connect an external server\n- receive tool definitions\n- call a tool\n- bring the result back into the agent\n\nBut if you want the system shape to approach real high-completion behavior, you quickly meet deeper questions:\n\n- is the server connected through stdio, HTTP, SSE, or WebSocket\n- why are some servers `connected`, while others are `pending` or `needs-auth`\n- where do resources and prompts fit relative to tools\n- why does elicitation become a special kind of interaction\n- where should OAuth or other auth flows be placed conceptually\n\nWithout a capability-layer map, MCP starts to feel scattered.\n\n## Terms First\n\n### What capability layers means\n\nA capability layer is simply:\n\n> one responsibility slice in a larger system\n\nThe point is to avoid mixing every MCP concern into one bag.\n\n### What transport means\n\nTransport is the connection channel between your agent and an MCP server:\n\n- stdio (standard input/output, good for local processes)\n- HTTP\n- SSE (Server-Sent Events, a one-way streaming protocol over HTTP)\n- WebSocket\n\n### What elicitation means\n\nThis is one of the less familiar terms.\n\nA simple teaching definition is:\n\n> an interaction where the MCP server asks the user for more input before it can continue\n\nSo the system is no longer only:\n\n> agent calls tool -> tool returns result\n\nThe server can also say:\n\n> I need more information before I can finish\n\nThis turns a simple call-and-return into a multi-step conversation between the agent and the server.\n\n## The Minimum Mental Model\n\nA clear six-layer picture:\n\n```text\n1. Config Layer\n what the server configuration looks like\n\n2. Transport Layer\n how the server connection is carried\n\n3. Connection State Layer\n connected / pending / failed / needs-auth\n\n4. Capability Layer\n tools / resources / prompts / elicitation\n\n5. Auth Layer\n whether authentication is required and what state it is in\n\n6. Router Integration Layer\n how MCP routes back into tool routing, permissions, and notifications\n```\n\nThe key lesson is:\n\n**tools are one layer, not the whole MCP story**\n\n## Why the Mainline Should Still Stay Tools-First\n\nThis matters a lot for teaching.\n\nEven though MCP contains multiple layers, the chapter mainline should still teach:\n\n### Step 1: external tools first\n\nBecause that connects most naturally to everything you already learned:\n\n- local tools\n- external tools\n- one shared router\n\n### Step 2: show that more capability layers exist\n\nFor example:\n\n- resources\n- prompts\n- elicitation\n- auth\n\n### Step 3: decide which advanced layers the repo should actually implement\n\nThat matches the teaching goal:\n\n**build the similar system first, then add the heavier platform layers**\n\n## Core Records\n\n### 1. `ScopedMcpServerConfig`\n\nEven a minimal teaching version should expose this idea:\n\n```python\nconfig = {\n \"name\": \"postgres\",\n \"type\": \"stdio\",\n \"command\": \"npx\",\n \"args\": [\"-y\", \"...\"],\n \"scope\": \"project\",\n}\n```\n\n`scope` matters because server configuration may come from different places (global user settings, project-level settings, or even per-workspace overrides).\n\n### 2. MCP connection state\n\n```python\nserver_state = {\n \"name\": \"postgres\",\n \"status\": \"connected\", # pending / failed / needs-auth / disabled\n \"config\": {...},\n}\n```\n\n### 3. `MCPToolSpec`\n\n```python\ntool = {\n \"name\": \"mcp__postgres__query\",\n \"description\": \"...\",\n \"input_schema\": {...},\n}\n```\n\n### 4. `ElicitationRequest`\n\n```python\nrequest = {\n \"server_name\": \"some-server\",\n \"message\": \"Please provide additional input\",\n \"requested_schema\": {...},\n}\n```\n\nThe teaching point is not that you need to implement elicitation immediately.\n\nThe point is:\n\n**MCP is not guaranteed to stay a one-way tool invocation forever**\n\n## The Cleaner Platform Picture\n\n```text\nMCP Config\n |\n v\nTransport\n |\n v\nConnection State\n |\n +-- connected\n +-- pending\n +-- needs-auth\n +-- failed\n |\n v\nCapabilities\n +-- tools\n +-- resources\n +-- prompts\n +-- elicitation\n |\n v\nRouter / Permission / Notification Integration\n```\n\n## Why Auth Should Not Dominate the Chapter Mainline\n\nAuth is a real layer in the full platform.\n\nBut if the mainline falls into OAuth or vendor-specific auth flow details too early, beginners lose the actual system shape.\n\nA better teaching order is:\n\n- first explain that an auth layer exists\n- then explain that `connected` and `needs-auth` are different connection states\n- only later, in advanced platform work, expand the full auth state machine\n\nThat keeps the repo honest without derailing your learning path.\n\n## How This Relates to `s19` and `s02a`\n\n- the `s19` chapter keeps teaching the tools-first external capability path\n- this note supplies the broader platform map\n- `s02a` explains how MCP capability eventually reconnects to the unified tool control plane\n\nTogether, they teach the actual idea:\n\n**MCP is an external capability platform, and tools are only the first face of it that enters the mainline**\n\n## Common Beginner Mistakes\n\n### 1. Treating MCP as only an external tool catalog\n\nThat makes resources, prompts, auth, and elicitation feel surprising later.\n\n### 2. Diving into transport or OAuth details too early\n\nThat breaks the teaching mainline.\n\n### 3. Letting MCP tools bypass permission checks\n\nThat opens a dangerous side door in the system boundary.\n\n### 4. Mixing server config, connection state, and exposed capabilities into one blob\n\nThose layers should stay conceptually separate.\n\n## Key Takeaway\n\n**MCP is a six-layer capability platform. Tools are the first layer you build, but resources, prompts, elicitation, auth, and router integration are all part of the full picture.**\n"
+ },
+ {
+ "version": null,
+ "slug": "teaching-scope",
+ "locale": "en",
+ "title": "Teaching Scope",
+ "kind": "bridge",
+ "filename": "teaching-scope.md",
+ "content": "# Teaching Scope\n\nThis document explains what you will learn in this repo, what is deliberately left out, and how each chapter stays aligned with your mental model as it grows.\n\n## The Goal Of This Repo\n\nThis is not a line-by-line commentary on some upstream production codebase.\n\nThe real goal is:\n\n**teach you how to build a high-completion coding-agent harness from scratch.**\n\nThat implies three obligations:\n\n1. you can actually rebuild it\n2. you keep the mainline clear instead of drowning in side detail\n3. you do not absorb mechanisms that do not really exist\n\n## What Every Chapter Should Cover\n\nEvery mainline chapter should make these things explicit:\n\n- what problem the mechanism solves\n- which module or layer it belongs to\n- what state it owns\n- what data structures it introduces\n- how it plugs back into the loop\n- what changes in the runtime flow after it appears\n\nIf you finish a chapter and still cannot say where the mechanism lives or what state it owns, the chapter is not done yet.\n\n## What We Deliberately Keep Simple\n\nThese topics are not forbidden, but they should not dominate your learning path:\n\n- packaging, build, and release flow\n- cross-platform compatibility glue\n- telemetry and enterprise policy wiring\n- historical compatibility branches\n- product-specific naming accidents\n- line-by-line upstream code matching\n\nThose belong in appendices, maintainer notes, or later productization notes, not at the center of the beginner path.\n\n## What \"High Fidelity\" Really Means Here\n\nHigh fidelity in a teaching repo does not mean reproducing every edge detail 1:1.\n\nIt means staying close to the true system backbone:\n\n- core runtime model\n- module boundaries\n- key records\n- state transitions\n- cooperation between major subsystems\n\nIn short:\n\n**be highly faithful to the trunk, and deliberate about teaching simplifications at the edges.**\n\n## Who This Is For\n\nYou do not need to be an expert in agent platforms.\n\nA better assumption about you:\n\n- basic Python is familiar\n- functions, classes, lists, and dictionaries are familiar\n- agent systems may be completely new\n\nThat means the chapters should:\n\n- explain new concepts before using them\n- keep one concept complete in one main place\n- move from \"what it is\" to \"why it exists\" to \"how to build it\"\n\n## Recommended Chapter Structure\n\nMainline chapters should roughly follow this order:\n\n1. what problem appears without this mechanism\n2. first explain the new terms\n3. give the smallest useful mental model\n4. show the core records / data structures\n5. show the smallest correct implementation\n6. show how it plugs into the main loop\n7. show common beginner mistakes\n8. show what a higher-completion version would add later\n\n## Terminology Guideline\n\nIf a chapter introduces a term from these categories, it should explain it:\n\n- design pattern\n- data structure\n- concurrency term\n- protocol / networking term\n- uncommon engineering vocabulary\n\nExamples:\n\n- state machine\n- scheduler\n- queue\n- worktree\n- DAG\n- protocol envelope\n\nDo not drop the name without the explanation.\n\n## Minimal Correct Version Principle\n\nReal mechanisms are often complex, but teaching works best when it does not start with every branch at once.\n\nPrefer this sequence:\n\n1. show the smallest correct version\n2. explain what core problem it already solves\n3. show what later iterations would add\n\nExamples:\n\n- permission system: first `deny -> mode -> allow -> ask`\n- error recovery: first three major recovery branches\n- task system: first task records, dependencies, and unlocks\n- team protocols: first request / response plus `request_id`\n\n## Checklist For Rewriting A Chapter\n\n- Does the first screen explain why the mechanism exists?\n- Are new terms explained before they are used?\n- Is there a small mental model or flow picture?\n- Are key records listed explicitly?\n- Is the plug-in point back into the loop explained?\n- Are core mechanisms separated from peripheral product detail?\n- Are the easiest confusion points called out?\n- Does the chapter avoid inventing mechanisms not supported by the repo?\n\n## How To Use Reverse-Engineered Source Material\n\nReverse-engineered source should be used as:\n\n**maintainer calibration material**\n\nUse it to:\n\n- verify the mainline mechanism is described correctly\n- verify important boundaries and records are not missing\n- verify the teaching implementation did not drift into fiction\n\nIt should never become a prerequisite for understanding the teaching docs.\n\n## Key Takeaway\n\n**The quality of a teaching repo is decided less by how many details it mentions and more by whether the important details are fully explained and the unimportant details are safely omitted.**\n"
+ },
+ {
+ "version": null,
+ "slug": "team-task-lane-model",
+ "locale": "en",
+ "title": "Team Task Lane Model",
+ "kind": "bridge",
+ "filename": "team-task-lane-model.md",
+ "content": "# Team Task Lane Model\n\n> **Deep Dive** -- Best read at the start of Stage 4 (s15-s18). It separates five concepts that look similar but live on different layers.\n\n### When to Read This\n\nBefore you start the team chapters. Keep it open as a reference during s15-s18.\n\n---\n\n> By the time you reach `s15-s18`, the easiest thing to blur is not a function name.\n>\n> It is this:\n>\n> **Who is working, who is coordinating, what records the goal, and what provides the execution lane.**\n\n## What This Bridge Doc Fixes\n\nAcross `s15-s18`, you will encounter these words that can easily blur into one vague idea:\n\n- teammate\n- protocol request\n- task\n- runtime task\n- worktree\n\nThey all relate to work getting done, but they do **not** live on the same layer.\n\nIf you do not separate them, the later chapters start to feel tangled:\n\n- Is a teammate the same thing as a task?\n- What is the difference between `request_id` and `task_id`?\n- Is a worktree just another runtime task?\n- Why can a task be complete while a worktree is still kept?\n\nThis document exists to separate those layers cleanly.\n\n## Recommended Reading Order\n\n1. Read [`s15-agent-teams.md`](./s15-agent-teams.md) for long-lived teammates.\n2. Read [`s16-team-protocols.md`](./s16-team-protocols.md) for tracked request-response coordination.\n3. Read [`s17-autonomous-agents.md`](./s17-autonomous-agents.md) for self-claiming teammates.\n4. Read [`s18-worktree-task-isolation.md`](./s18-worktree-task-isolation.md) for isolated execution lanes.\n\nIf the vocabulary starts to blur, you might find it helpful to revisit:\n\n- [`entity-map.md`](./entity-map.md)\n- [`data-structures.md`](./data-structures.md)\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n\n## The Core Separation\n\n```text\nteammate\n = who participates over time\n\nprotocol request\n = one tracked coordination request inside the team\n\ntask\n = what should be done\n\nruntime task / execution slot\n = what is actively running right now\n\nworktree\n = where the work executes without colliding with other lanes\n```\n\nThe most common confusion is between the last three:\n\n- `task`\n- `runtime task`\n- `worktree`\n\nAsk three separate questions every time:\n\n- Is this the goal?\n- Is this the running execution unit?\n- Is this the isolated execution directory?\n\n## The Smallest Clean Diagram\n\n```text\nTeam Layer\n teammate: alice (frontend)\n\nProtocol Layer\n request_id=req_01\n kind=plan_approval\n status=pending\n\nWork Graph Layer\n task_id=12\n subject=\"Implement login page\"\n owner=\"alice\"\n status=\"in_progress\"\n\nRuntime Layer\n runtime_id=rt_01\n type=in_process_teammate\n status=running\n\nExecution Lane Layer\n worktree=login-page\n path=.worktrees/login-page\n status=active\n```\n\nOnly one of those records the work goal itself:\n\n> `task_id=12`\n\nThe others support coordination, execution, or isolation around that goal.\n\n## 1. Teammate: Who Is Collaborating\n\nIntroduced in `s15`.\n\nThis layer answers:\n\n- what the long-lived worker is called\n- what role it has\n- whether it is `working`, `idle`, or `shutdown`\n- whether it has its own inbox\n\nExample:\n\n```python\nmember = {\n \"name\": \"alice\",\n \"role\": \"frontend\",\n \"status\": \"idle\",\n}\n```\n\nThe point is not \"another agent instance.\"\n\nThe point is:\n\n> a persistent identity that can repeatedly receive work.\n\n## 2. Protocol Request: What Is Being Coordinated\n\nIntroduced in `s16`.\n\nThis layer answers:\n\n- who asked whom\n- what kind of request this is\n- whether it is still pending or already resolved\n\nExample:\n\n```python\nrequest = {\n \"request_id\": \"a1b2c3d4\",\n \"kind\": \"plan_approval\",\n \"from\": \"alice\",\n \"to\": \"lead\",\n \"status\": \"pending\",\n}\n```\n\nThis is not ordinary chat.\n\nIt is:\n\n> a coordination record whose state can continue to evolve.\n\n## 3. Task: What Should Be Done\n\nThis is the durable work-graph task from `s12`, and it is what `s17` teammates claim.\n\nIt answers:\n\n- what the goal is\n- who owns it\n- what blocks it\n- what progress state it is in\n\nExample:\n\n```python\ntask = {\n \"id\": 12,\n \"subject\": \"Implement login page\",\n \"status\": \"in_progress\",\n \"owner\": \"alice\",\n \"blockedBy\": [],\n}\n```\n\nKeyword:\n\n**goal**\n\nNot directory. Not protocol. Not process.\n\n## 4. Runtime Task / Execution Slot: What Is Running\n\nThis layer was already clarified in the `s13a` bridge doc, but it matters even more in `s15-s18`.\n\nExamples:\n\n- a background shell command\n- a long-lived teammate currently working\n- a monitor process watching an external state\n\nThese are best understood as:\n\n> active execution slots\n\nExample:\n\n```python\nruntime = {\n \"id\": \"rt_01\",\n \"type\": \"in_process_teammate\",\n \"status\": \"running\",\n \"work_graph_task_id\": 12,\n}\n```\n\nImportant boundary:\n\n- one work-graph task may spawn multiple runtime tasks\n- a runtime task is an execution instance, not the durable goal itself\n\n## 5. Worktree: Where the Work Happens\n\nIntroduced in `s18`.\n\nThis layer answers:\n\n- which isolated directory is used\n- which task it is bound to\n- whether that lane is `active`, `kept`, or `removed`\n\nExample:\n\n```python\nworktree = {\n \"name\": \"login-page\",\n \"path\": \".worktrees/login-page\",\n \"task_id\": 12,\n \"status\": \"active\",\n}\n```\n\nKeyword:\n\n**execution boundary**\n\nIt is not the task goal itself. It is the isolated lane where that goal is executed.\n\n## How The Layers Connect\n\n```text\nteammate\n coordinates through protocol requests\n claims a task\n runs as an execution slot\n works inside a worktree lane\n```\n\nIn a more concrete sentence:\n\n> `alice` claims `task #12` and progresses it inside the `login-page` worktree lane.\n\nThat sentence is much cleaner than saying:\n\n> \"alice is doing the login-page worktree task\"\n\nbecause the shorter sentence incorrectly merges:\n\n- the teammate\n- the task\n- the worktree\n\n## Common Mistakes\n\n### 1. Treating teammate and task as the same object\n\nThe teammate executes. The task expresses the goal.\n\n### 2. Treating `request_id` and `task_id` as interchangeable\n\nOne tracks coordination. The other tracks work goals.\n\n### 3. Treating the runtime slot as the durable task\n\nThe running execution may end while the durable task still exists.\n\n### 4. Treating the worktree as the task itself\n\nThe worktree is only the execution lane.\n\n### 5. Saying \"the system works in parallel\" without naming the layers\n\nGood teaching does not stop at \"there are many agents.\"\n\nIt can say clearly:\n\n> teammates provide long-lived collaboration, requests track coordination, tasks record goals, runtime slots carry execution, and worktrees isolate the execution directory.\n\n## What You Should Be Able to Say After Reading This\n\n1. `s17` autonomy claims `s12` work-graph tasks, not `s13` runtime slots.\n2. `s18` worktrees bind execution lanes to tasks; they do not turn tasks into directories.\n3. A teammate can be idle while the task still exists and while the worktree is still kept.\n4. A protocol request tracks a coordination exchange, not a work goal.\n\n## Key Takeaway\n\n**Five things that sound alike -- teammate, protocol request, task, runtime slot, worktree -- live on five separate layers. Naming which layer you mean is how you keep the team chapters from collapsing into confusion.**\n"
+ },
+ {
+ "version": null,
+ "slug": "data-structures",
+ "locale": "zh",
+ "title": "Core Data Structures (核心数据结构总表)",
+ "kind": "bridge",
+ "filename": "data-structures.md",
+ "content": "# Core Data Structures (核心数据结构总表)\n\n> 学习 agent,最容易迷路的地方不是功能太多,而是不知道“状态到底放在哪”。这份文档把主线章节和桥接章节里反复出现的关键数据结构集中列出来,方便你把整套系统看成一张图。\n\n## 推荐联读\n\n建议把这份总表当成“状态地图”来用:\n\n- 先不懂词,就回 [`glossary.md`](./glossary.md)。\n- 先不懂边界,就回 [`entity-map.md`](./entity-map.md)。\n- 如果卡在 `TaskRecord` 和 `RuntimeTaskState`,继续看 [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)。\n- 如果卡在 MCP 为什么还有 resource / prompt / elicitation,继续看 [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md)。\n\n## 先记住两个总原则\n\n### 原则 1:区分“内容状态”和“流程状态”\n\n- `messages`、`tool_result`、memory 正文,属于内容状态。\n- `turn_count`、`transition`、`pending_classifier_check`,属于流程状态。\n\n很多初学者会把这两类状态混在一起。 \n一混,后面就很难看懂为什么一个结构完整的系统会需要控制平面。\n\n### 原则 2:区分“持久状态”和“运行时状态”\n\n- task、memory、schedule 这类状态,通常会落盘,跨会话存在。\n- runtime task、当前 permission decision、当前 MCP connection 这类状态,通常只在系统运行时活着。\n\n## 1. 查询与对话控制状态\n\n### Message\n\n作用:保存当前对话和工具往返历史。\n\n最小形状:\n\n```python\nmessage = {\n \"role\": \"user\" | \"assistant\",\n \"content\": \"...\",\n}\n```\n\n支持工具调用后,`content` 常常不再只是字符串,而会变成块列表,其中可能包含:\n\n- text block\n- `tool_use`\n- `tool_result`\n\n相关章节:\n\n- `s01`\n- `s02`\n- `s06`\n- `s10`\n\n### NormalizedMessage\n\n作用:把不同来源的消息整理成统一、稳定、可送给模型 API 的消息格式。\n\n最小形状:\n\n```python\nmessage = {\n \"role\": \"user\" | \"assistant\",\n \"content\": [\n {\"type\": \"text\", \"text\": \"...\"},\n ],\n}\n```\n\n它和普通 `Message` 的区别是:\n\n- `Message` 偏“系统内部记录”\n- `NormalizedMessage` 偏“准备发给模型之前的统一输入”\n\n相关章节:\n\n- `s10`\n- [`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md)\n\n### CompactSummary\n\n作用:上下文太长时,用摘要替代旧消息原文。\n\n最小形状:\n\n```python\nsummary = {\n \"task_overview\": \"...\",\n \"current_state\": \"...\",\n \"key_decisions\": [\"...\"],\n \"next_steps\": [\"...\"],\n}\n```\n\n相关章节:\n\n- `s06`\n- `s11`\n\n### SystemPromptBlock\n\n作用:把 system prompt 从一整段大字符串,拆成若干可管理片段。\n\n最小形状:\n\n```python\nblock = {\n \"text\": \"...\",\n \"cache_scope\": None,\n}\n```\n\n你可以把它理解成:\n\n- `text`:这一段提示词正文\n- `cache_scope`:这一段是否可以复用缓存\n\n相关章节:\n\n- `s10`\n- [`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md)\n\n### PromptParts\n\n作用:在真正拼成 system prompt 之前,先把各部分拆开管理。\n\n最小形状:\n\n```python\nparts = {\n \"core\": \"...\",\n \"tools\": \"...\",\n \"skills\": \"...\",\n \"memory\": \"...\",\n \"claude_md\": \"...\",\n \"dynamic\": \"...\",\n}\n```\n\n相关章节:\n\n- `s10`\n\n### QueryParams\n\n作用:进入查询主循环时,外部一次性传进来的输入集合。\n\n最小形状:\n\n```python\nparams = {\n \"messages\": [...],\n \"system_prompt\": \"...\",\n \"user_context\": {...},\n \"system_context\": {...},\n \"tool_use_context\": {...},\n \"fallback_model\": None,\n \"max_output_tokens_override\": None,\n \"max_turns\": None,\n}\n```\n\n它的重要点在于:\n\n- 这是“本次 query 的入口输入”\n- 它和循环内部不断变化的状态,不是同一层\n\n相关章节:\n\n- [`s00a-query-control-plane.md`](./s00a-query-control-plane.md)\n\n### QueryState\n\n作用:保存一条 query 在多轮循环之间不断变化的流程状态。\n\n最小形状:\n\n```python\nstate = {\n \"messages\": [...],\n \"tool_use_context\": {...},\n \"turn_count\": 1,\n \"max_output_tokens_recovery_count\": 0,\n \"has_attempted_reactive_compact\": False,\n \"max_output_tokens_override\": None,\n \"pending_tool_use_summary\": None,\n \"stop_hook_active\": False,\n \"transition\": None,\n}\n```\n\n这类字段的共同特点是:\n\n- 它们不是对话内容\n- 它们是“这一轮该怎么继续”的控制状态\n\n相关章节:\n\n- [`s00a-query-control-plane.md`](./s00a-query-control-plane.md)\n- `s11`\n\n### TransitionReason\n\n作用:记录“上一轮为什么继续了,而不是结束”。\n\n最小形状:\n\n```python\ntransition = {\n \"reason\": \"next_turn\",\n}\n```\n\n在更完整的 query 状态里,这个 `reason` 常见会有这些类型:\n\n- `next_turn`\n- `reactive_compact_retry`\n- `token_budget_continuation`\n- `max_output_tokens_recovery`\n- `stop_hook_continuation`\n\n它的价值不是炫技,而是让:\n\n- 日志更清楚\n- 测试更清楚\n- 恢复链路更清楚\n\n相关章节:\n\n- [`s00a-query-control-plane.md`](./s00a-query-control-plane.md)\n- `s11`\n\n## 2. 工具、权限与 hook 执行状态\n\n### ToolSpec\n\n作用:告诉模型“有哪些工具、每个工具要什么输入”。\n\n最小形状:\n\n```python\ntool = {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {...},\n}\n```\n\n相关章节:\n\n- `s02`\n- `s19`\n\n### ToolDispatchMap\n\n作用:把工具名映射到真实执行函数。\n\n最小形状:\n\n```python\nhandlers = {\n \"read_file\": run_read,\n \"write_file\": run_write,\n \"bash\": run_bash,\n}\n```\n\n相关章节:\n\n- `s02`\n\n### ToolUseContext\n\n作用:把工具运行时需要的共享环境打成一个总线。\n\n最小形状:\n\n```python\ntool_use_context = {\n \"tools\": handlers,\n \"permission_context\": {...},\n \"mcp_clients\": [],\n \"messages\": [...],\n \"app_state\": {...},\n \"cwd\": \"...\",\n \"read_file_state\": {...},\n \"notifications\": [],\n}\n```\n\n这层很关键。 \n因为在更完整的工具执行环境里,工具拿到的不只是 `tool_input`,还包括:\n\n- 当前权限环境\n- 当前消息\n- 当前 app state\n- 当前 MCP client\n- 当前文件读取缓存\n\n相关章节:\n\n- [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md)\n- `s07`\n- `s19`\n\n### PermissionRule\n\n作用:描述某类工具调用命中后该怎么处理。\n\n最小形状:\n\n```python\nrule = {\n \"tool_name\": \"bash\",\n \"rule_content\": \"rm -rf *\",\n \"behavior\": \"deny\",\n}\n```\n\n相关章节:\n\n- `s07`\n\n### PermissionRuleSource\n\n作用:标记一条权限规则是从哪里来的。\n\n最小形状:\n\n```python\nsource = (\n \"userSettings\"\n | \"projectSettings\"\n | \"localSettings\"\n | \"flagSettings\"\n | \"policySettings\"\n | \"cliArg\"\n | \"command\"\n | \"session\"\n)\n```\n\n这个结构的意义是:\n\n- 你不只知道“有什么规则”\n- 还知道“这条规则是谁加进来的”\n\n相关章节:\n\n- `s07`\n\n### PermissionDecision\n\n作用:表示一次工具调用当前该允许、拒绝还是提问。\n\n最小形状:\n\n```python\ndecision = {\n \"behavior\": \"allow\" | \"deny\" | \"ask\",\n \"reason\": \"matched deny rule\",\n}\n```\n\n在更完整的权限流里,`ask` 结果还可能带:\n\n- 修改后的输入\n- 建议写回哪些规则更新\n- 一个后台自动分类检查\n\n相关章节:\n\n- `s07`\n\n### PermissionUpdate\n\n作用:描述“这次权限确认之后,要把什么改回配置里”。\n\n最小形状:\n\n```python\nupdate = {\n \"type\": \"addRules\" | \"removeRules\" | \"setMode\" | \"addDirectories\",\n \"destination\": \"userSettings\" | \"projectSettings\" | \"localSettings\" | \"session\",\n \"rules\": [],\n}\n```\n\n它解决的是一个很容易被漏掉的问题:\n\n用户这次点了“允许”,到底只是这一次放行,还是要写回会话、项目,甚至用户级配置。\n\n相关章节:\n\n- `s07`\n\n### HookContext\n\n作用:把某个 hook 事件发生时的上下文打包给外部脚本。\n\n最小形状:\n\n```python\ncontext = {\n \"event\": \"PreToolUse\",\n \"tool_name\": \"bash\",\n \"tool_input\": {...},\n \"tool_result\": \"...\",\n}\n```\n\n相关章节:\n\n- `s08`\n\n### RecoveryState\n\n作用:记录恢复流程已经尝试到哪里。\n\n最小形状:\n\n```python\nstate = {\n \"continuation_attempts\": 0,\n \"compact_attempts\": 0,\n \"transport_attempts\": 0,\n}\n```\n\n相关章节:\n\n- `s11`\n\n## 3. 持久化工作状态\n\n### TodoItem\n\n作用:当前会话里的轻量计划项。\n\n最小形状:\n\n```python\ntodo = {\n \"content\": \"Read parser.py\",\n \"status\": \"pending\" | \"completed\",\n}\n```\n\n相关章节:\n\n- `s03`\n\n### MemoryEntry\n\n作用:保存跨会话仍然有价值的信息。\n\n最小形状:\n\n```python\nmemory = {\n \"name\": \"prefer_tabs\",\n \"description\": \"User prefers tabs for indentation\",\n \"type\": \"user\" | \"feedback\" | \"project\" | \"reference\",\n \"scope\": \"private\" | \"team\",\n \"body\": \"...\",\n}\n```\n\n这里最重要的不是字段多,而是边界清楚:\n\n- 只存不容易从当前项目状态重新推出来的东西\n- 记忆可能会过时,要验证\n\n相关章节:\n\n- `s09`\n\n### TaskRecord\n\n作用:磁盘上的工作图任务节点。\n\n最小形状:\n\n```python\ntask = {\n \"id\": 12,\n \"subject\": \"Implement auth module\",\n \"description\": \"\",\n \"status\": \"pending\",\n \"blockedBy\": [],\n \"blocks\": [],\n \"owner\": \"\",\n \"worktree\": \"\",\n}\n```\n\n重点字段:\n\n- `blockedBy`:谁挡着我\n- `blocks`:我挡着谁\n- `owner`:谁认领了\n- `worktree`:在哪个隔离目录里做\n\n相关章节:\n\n- `s12`\n- `s17`\n- `s18`\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n\n### ScheduleRecord\n\n作用:记录未来要触发的调度任务。\n\n最小形状:\n\n```python\nschedule = {\n \"id\": \"job_001\",\n \"cron\": \"0 9 * * 1\",\n \"prompt\": \"Generate weekly report\",\n \"recurring\": True,\n \"durable\": True,\n \"created_at\": 1710000000.0,\n \"last_fired_at\": None,\n}\n```\n\n相关章节:\n\n- `s14`\n\n## 4. 运行时执行状态\n\n### RuntimeTaskState\n\n作用:表示系统里一个“正在运行的执行单元”。\n\n最小形状:\n\n```python\nruntime_task = {\n \"id\": \"b8k2m1qz\",\n \"type\": \"local_bash\",\n \"status\": \"running\",\n \"description\": \"Run pytest\",\n \"start_time\": 1710000000.0,\n \"end_time\": None,\n \"output_file\": \".task_outputs/b8k2m1qz.txt\",\n \"notified\": False,\n}\n```\n\n这和 `TaskRecord` 不是一回事:\n\n- `TaskRecord` 管工作目标\n- `RuntimeTaskState` 管当前执行槽位\n\n相关章节:\n\n- `s13`\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n\n### TeamMember\n\n作用:记录一个持久队友是谁、在做什么。\n\n最小形状:\n\n```python\nmember = {\n \"name\": \"alice\",\n \"role\": \"coder\",\n \"status\": \"idle\",\n}\n```\n\n相关章节:\n\n- `s15`\n- `s17`\n\n### MessageEnvelope\n\n作用:队友之间传递结构化消息。\n\n最小形状:\n\n```python\nmessage = {\n \"type\": \"message\" | \"shutdown_request\" | \"plan_approval\",\n \"from\": \"lead\",\n \"to\": \"alice\",\n \"request_id\": \"req_001\",\n \"content\": \"...\",\n \"payload\": {},\n \"timestamp\": 1710000000.0,\n}\n```\n\n相关章节:\n\n- `s15`\n- `s16`\n\n### RequestRecord\n\n作用:追踪一个协议请求当前走到哪里。\n\n最小形状:\n\n```python\nrequest = {\n \"request_id\": \"req_001\",\n \"kind\": \"shutdown\" | \"plan_review\",\n \"status\": \"pending\" | \"approved\" | \"rejected\" | \"expired\",\n \"from\": \"lead\",\n \"to\": \"alice\",\n}\n```\n\n相关章节:\n\n- `s16`\n\n### WorktreeRecord\n\n作用:记录一个任务绑定的隔离工作目录。\n\n最小形状:\n\n```python\nworktree = {\n \"name\": \"auth-refactor\",\n \"path\": \".worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\",\n}\n```\n\n相关章节:\n\n- `s18`\n\n### WorktreeEvent\n\n作用:记录 worktree 生命周期事件,便于恢复和排查。\n\n最小形状:\n\n```python\nevent = {\n \"event\": \"worktree.create.after\",\n \"task_id\": 12,\n \"worktree\": \"auth-refactor\",\n \"ts\": 1710000000.0,\n}\n```\n\n相关章节:\n\n- `s18`\n\n## 5. 外部平台与 MCP 状态\n\n### ScopedMcpServerConfig\n\n作用:描述一个 MCP server 应该如何连接,以及它的配置来自哪个作用域。\n\n最小形状:\n\n```python\nconfig = {\n \"name\": \"postgres\",\n \"type\": \"stdio\",\n \"command\": \"npx\",\n \"args\": [\"-y\", \"...\"],\n \"scope\": \"project\",\n}\n```\n\n这个 `scope` 很重要,因为 server 配置可能来自:\n\n- 本地\n- 用户\n- 项目\n- 动态注入\n- 插件或托管来源\n\n相关章节:\n\n- `s19`\n- [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md)\n- [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md)\n- [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md)\n\n### MCPServerConnectionState\n\n作用:表示一个 MCP server 当前连到了哪一步。\n\n最小形状:\n\n```python\nserver_state = {\n \"name\": \"postgres\",\n \"type\": \"connected\", # pending / failed / needs-auth / disabled\n \"config\": {...},\n}\n```\n\n这层特别重要,因为“有没有接上”不是布尔值,而是多种状态:\n\n- `connected`\n- `pending`\n- `failed`\n- `needs-auth`\n- `disabled`\n\n相关章节:\n\n- `s19`\n- [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md)\n\n### MCPToolSpec\n\n作用:把外部 MCP 工具转换成 agent 内部统一工具定义。\n\n最小形状:\n\n```python\nmcp_tool = {\n \"name\": \"mcp__postgres__query\",\n \"description\": \"Run a SQL query\",\n \"input_schema\": {...},\n}\n```\n\n相关章节:\n\n- `s19`\n\n### ElicitationRequest\n\n作用:表示 MCP server 反过来向用户请求额外输入。\n\n最小形状:\n\n```python\nrequest = {\n \"server_name\": \"some-server\",\n \"message\": \"Please provide additional input\",\n \"requested_schema\": {...},\n}\n```\n\n它提醒你一件事:\n\n- MCP 不只是“模型主动调工具”\n- 外部 server 也可能反过来请求补充输入\n\n相关章节:\n\n- [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md)\n\n## 最后用一句话把它们串起来\n\n如果你只想记一条总线索,可以记这个:\n\n```text\nmessages / prompt / query state\n 管本轮输入和继续理由\n\ntools / permissions / hooks\n 管动作怎么安全执行\n\nmemory / task / schedule\n 管跨轮、跨会话的持久工作\n\nruntime task / team / worktree\n 管当前执行车道\n\nmcp\n 管系统怎样向外接能力\n```\n\n这份总表最好配合 [`s00-architecture-overview.md`](./s00-architecture-overview.md) 和 [`entity-map.md`](./entity-map.md) 一起看。\n\n## 教学边界\n\n这份总表只负责做两件事:\n\n- 帮你确认一个状态到底属于哪一层\n- 帮你确认这个状态大概长什么样\n\n它不负责穷举真实系统里的每一个字段、每一条兼容分支、每一种产品化补丁。\n\n如果你已经知道某个状态归谁管、什么时候创建、什么时候销毁,再回到对应章节看执行路径,理解会顺很多。\n"
+ },
+ {
+ "version": null,
+ "slug": "entity-map",
+ "locale": "zh",
+ "title": "Entity Map (系统实体边界图)",
+ "kind": "bridge",
+ "filename": "entity-map.md",
+ "content": "# Entity Map (系统实体边界图)\n\n> 这份文档不是某一章的正文,而是一张“别再混词”的地图。 \n> 到了仓库后半程,真正让读者困惑的往往不是代码,而是:\n>\n> **同一个系统里,为什么会同时出现这么多看起来很像、但其实不是一回事的实体。**\n\n## 这张图和另外几份桥接文档怎么分工\n\n- 这份图先回答:一个词到底属于哪一层。\n- [`glossary.md`](./glossary.md) 先回答:这个词到底是什么意思。\n- [`data-structures.md`](./data-structures.md) 再回答:这个词落到代码里时,状态长什么样。\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) 专门补“工作图任务”和“运行时任务”的分层。\n- [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md) 专门补 MCP 平台层不是只有 tools。\n\n## 先给一个总图\n\n```text\n对话层\n - message\n - prompt block\n - reminder\n\n动作层\n - tool call\n - tool result\n - hook event\n\n工作层\n - work-graph task\n - runtime task\n - protocol request\n\n执行层\n - subagent\n - teammate\n - worktree lane\n\n平台层\n - mcp server\n - mcp capability\n - memory record\n```\n\n## 最容易混淆的 8 对概念\n\n### 1. Message vs Prompt Block\n\n| 实体 | 它是什么 | 它不是什么 | 常见位置 |\n|---|---|---|---|\n| `Message` | 对话历史中的一条消息 | 不是长期系统规则 | `messages[]` |\n| `Prompt Block` | system prompt 内的一段稳定说明 | 不是某一轮刚发生的事件 | prompt builder |\n\n简单记法:\n\n- message 更像“对话内容”\n- prompt block 更像“系统说明”\n\n### 2. Todo / Plan vs Task\n\n| 实体 | 它是什么 | 它不是什么 |\n|---|---|---|\n| `todo / plan` | 当前轮或当前阶段的过程性安排 | 不是长期持久化工作图 |\n| `task` | 持久化的工作节点 | 不是某一轮的临时思路 |\n\n### 3. Work-Graph Task vs Runtime Task\n\n| 实体 | 它是什么 | 它不是什么 |\n|---|---|---|\n| `work-graph task` | 任务板上的工作节点 | 不是系统里活着的执行单元 |\n| `runtime task` | 当前正在执行的后台/agent/monitor 槽位 | 不是依赖图节点 |\n\n这对概念是整个仓库后半程最关键的区分之一。\n\n### 4. Subagent vs Teammate\n\n| 实体 | 它是什么 | 它不是什么 |\n|---|---|---|\n| `subagent` | 一次性委派执行者 | 不是长期在线成员 |\n| `teammate` | 持久存在、可重复接活的队友 | 不是一次性摘要工具 |\n\n### 5. Protocol Request vs Normal Message\n\n| 实体 | 它是什么 | 它不是什么 |\n|---|---|---|\n| `normal message` | 自由文本沟通 | 不是可追踪的审批流程 |\n| `protocol request` | 带 request_id 的结构化请求 | 不是随便说一句话 |\n\n### 6. Worktree vs Task\n\n| 实体 | 它是什么 | 它不是什么 |\n|---|---|---|\n| `task` | 说明要做什么 | 不是目录 |\n| `worktree` | 说明在哪做 | 不是工作目标 |\n\n### 7. Memory vs CLAUDE.md\n\n| 实体 | 它是什么 | 它不是什么 |\n|---|---|---|\n| `memory` | 跨会话仍有价值、但不易从当前代码直接推出来的信息 | 不是项目规则文件 |\n| `CLAUDE.md` | 长期规则、约束和说明 | 不是用户偏好或项目动态背景 |\n\n### 8. MCP Server vs MCP Tool\n\n| 实体 | 它是什么 | 它不是什么 |\n|---|---|---|\n| `MCP server` | 外部能力提供者 | 不是单个工具定义 |\n| `MCP tool` | 某个 server 暴露出来的一项具体能力 | 不是完整平台连接本身 |\n\n## 一张“是什么 / 存在哪里”的速查表\n\n| 实体 | 主要作用 | 典型存放位置 |\n|---|---|---|\n| `Message` | 当前对话历史 | `messages[]` |\n| `PromptParts` | system prompt 的组装片段 | prompt builder |\n| `PermissionRule` | 工具执行前的决策规则 | settings / session state |\n| `HookEvent` | 某个时机触发的扩展点 | hook config |\n| `MemoryEntry` | 跨会话有价值信息 | `.memory/` |\n| `TaskRecord` | 持久化工作节点 | `.tasks/` |\n| `RuntimeTaskState` | 正在执行的任务槽位 | runtime task manager |\n| `TeamMember` | 持久队友 | `.team/config.json` |\n| `MessageEnvelope` | 队友间结构化消息 | `.team/inbox/*.jsonl` |\n| `RequestRecord` | 审批/关机等协议状态 | request tracker |\n| `WorktreeRecord` | 隔离工作目录记录 | `.worktrees/index.json` |\n| `MCPServerConfig` | 外部 server 配置 | plugin / settings |\n\n## 后半程推荐怎么记\n\n如果你到了 `s15` 以后开始觉得名词多,可以只记这条线:\n\n```text\nmessage / prompt\n 管输入\n\ntool / permission / hook\n 管动作\n\ntask / runtime task / protocol\n 管工作推进\n\nsubagent / teammate / worktree\n 管执行者和执行车道\n\nmcp / memory / claude.md\n 管平台外延和长期上下文\n```\n\n## 初学者最容易心智打结的地方\n\n### 1. 把“任务”这个词用在所有层\n\n这是最常见的混乱来源。\n\n所以建议你在写正文时,尽量直接写全:\n\n- 工作图任务\n- 运行时任务\n- 后台任务\n- 协议请求\n\n不要都叫“任务”。\n\n### 2. 把队友和子 agent 混成一类\n\n如果生命周期不同,就不是同一类实体。\n\n### 3. 把 worktree 当成 task 的别名\n\n一个是“做什么”,一个是“在哪做”。\n\n### 4. 把 memory 当成通用笔记本\n\n它不是。它只保存很特定的一类长期信息。\n\n## 这份图应该怎么用\n\n最好的用法不是读一遍背下来,而是:\n\n- 每次你发现两个词开始混\n- 先来这张图里确认它们是不是一个层级\n- 再回去读对应章节\n\n如果你确认“不在一个层级”,下一步最好立刻去找它们对应的数据结构,而不是继续凭感觉读正文。\n\n## 教学边界\n\n这张图只解决“实体边界”这一个问题。\n\n它不负责展开每个实体的全部字段,也不负责把所有产品化分支一起讲完。\n\n你可以把它当成一张分层地图:\n\n- 先确认词属于哪一层\n- 再去对应章节看机制\n- 最后去 [`data-structures.md`](./data-structures.md) 看状态形状\n\n## 一句话记住\n\n**一个结构完整的系统最怕的不是功能多,而是实体边界不清;边界一清,很多复杂度会自动塌下来。**\n"
+ },
+ {
+ "version": null,
+ "slug": "glossary",
+ "locale": "zh",
+ "title": "Glossary (术语表)",
+ "kind": "bridge",
+ "filename": "glossary.md",
+ "content": "# Glossary (术语表)\n\n> 这份术语表只收录本仓库主线里最重要、最容易让初学者卡住的词。 \n> 如果某个词你看着眼熟但说不清它到底是什么,先回这里。\n\n## 推荐联读\n\n如果你不是单纯查词,而是已经开始分不清“这些词分别活在哪一层”,建议按这个顺序一起看:\n\n- 先看 [`entity-map.md`](./entity-map.md):搞清每个实体属于哪一层。\n- 再看 [`data-structures.md`](./data-structures.md):搞清这些词真正落成什么状态结构。\n- 如果你卡在“任务”这个词上,再看 [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)。\n- 如果你卡在 MCP 不只等于 tools,再看 [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md)。\n\n## Agent\n\n在这套仓库里,`agent` 指的是: \n**一个能根据输入做判断,并且会调用工具去完成任务的模型。**\n\n你可以简单理解成:\n\n- 模型负责思考\n- harness 负责给模型工作环境\n\n## Harness\n\n`harness` 可以理解成“给 agent 准备好的工作台”。\n\n它包括:\n\n- 工具\n- 文件系统\n- 权限\n- 提示词\n- 记忆\n- 任务系统\n\n模型本身不是 harness。 \nharness 也不是模型。\n\n## Agent Loop\n\n`agent loop` 是系统反复执行的一条主循环:\n\n1. 把当前上下文发给模型\n2. 看模型是要直接回答,还是要调工具\n3. 如果调工具,就执行工具\n4. 把工具结果写回上下文\n5. 再继续下一轮\n\n没有这条循环,就没有 agent 系统。\n\n## Message / Messages\n\n`message` 是一条消息。 \n`messages` 是消息列表。\n\n它通常包含:\n\n- 用户消息\n- assistant 消息\n- tool_result 消息\n\n这份列表就是 agent 最主要的工作记忆。\n\n## Tool\n\n`tool` 是模型可以调用的一种动作。\n\n例如:\n\n- 读文件\n- 写文件\n- 改文件\n- 跑 shell 命令\n- 搜索文本\n\n模型并不直接执行系统命令。 \n模型只是说“我要调哪个工具、传什么参数”,真正执行的是你的代码。\n\n## Tool Schema\n\n`tool schema` 是工具的输入说明。\n\n它告诉模型:\n\n- 这个工具叫什么\n- 这个工具做什么\n- 需要哪些参数\n- 参数是什么类型\n\n可以把它想成“工具使用说明书”。\n\n## Dispatch Map\n\n`dispatch map` 是一张映射表:\n\n```python\n{\n \"read_file\": read_file_handler,\n \"write_file\": write_file_handler,\n \"bash\": bash_handler,\n}\n```\n\n意思是:\n\n- 模型说要调用 `read_file`\n- 代码就去表里找到 `read_file_handler`\n- 然后执行它\n\n## Stop Reason\n\n`stop_reason` 是模型这一轮为什么停下来的原因。\n\n常见的有:\n\n- `end_turn`:模型说完了\n- `tool_use`:模型要调用工具\n- `max_tokens`:模型输出被截断了\n\n它决定主循环下一步怎么走。\n\n## Context\n\n`context` 是模型当前能看到的信息总和。\n\n包括:\n\n- `messages`\n- system prompt\n- 动态补充信息\n- tool_result\n\n上下文不是永久记忆。 \n上下文是“这一轮工作时当前摆在桌上的东西”。\n\n## Compact / Compaction\n\n`compact` 指压缩上下文。\n\n因为对话越长,模型能看到的历史就越多,成本和混乱也会一起增加。\n\n压缩的目标不是“删除有用信息”,而是:\n\n- 保留真正关键的内容\n- 去掉重复和噪声\n- 给后面的轮次腾空间\n\n## Subagent\n\n`subagent` 是从当前 agent 派生出来的一个子任务执行者。\n\n它最重要的价值是:\n\n**把一个大任务放到独立上下文里处理,避免污染父上下文。**\n\n## Fork\n\n`fork` 在本仓库语境里,指一种子 agent 启动方式:\n\n- 不是从空白上下文开始\n- 而是先继承父 agent 的已有上下文\n\n这适合“子任务必须理解当前讨论背景”的场景。\n\n## Permission\n\n`permission` 就是“这个工具调用能不能执行”。\n\n一个好的权限系统通常要回答三件事:\n\n- 应不应该直接拒绝\n- 能不能自动允许\n- 剩下的是不是要问用户\n\n## Permission Mode\n\n`permission mode` 是权限系统的工作模式。\n\n例如:\n\n- `default`:默认询问\n- `plan`:只允许读,不允许写\n- `auto`:简单安全的操作自动过,危险操作再问\n\n## Hook\n\n`hook` 是一个插入点。\n\n意思是: \n在不改主循环代码的前提下,在某个时机额外执行一段逻辑。\n\n例如:\n\n- 工具调用前先检查一下\n- 工具调用后追加一条审计信息\n\n## Memory\n\n`memory` 是跨会话保存的信息。\n\n但不是所有东西都该存 memory。\n\n适合存 memory 的,通常是:\n\n- 用户长期偏好\n- 多次出现的重要反馈\n- 未来别的会话仍然有价值的信息\n\n## System Prompt\n\n`system prompt` 是系统级说明。\n\n它告诉模型:\n\n- 你是谁\n- 你能做什么\n- 你有哪些规则\n- 你应该如何协作\n\n它比普通用户消息更稳定。\n\n## System Reminder\n\n`system reminder` 是每一轮临时追加的动态提醒。\n\n例如:\n\n- 当前目录\n- 当前日期\n- 某个本轮才需要的额外上下文\n\n它和稳定的 system prompt 不是一回事。\n\n## Task\n\n`task` 是持久化任务系统里的一个任务节点。\n\n一个 task 通常不只是一句待办事项,还会带:\n\n- 状态\n- 描述\n- 依赖关系\n- owner\n\n## Dependency Graph\n\n`dependency graph` 指任务之间的依赖关系图。\n\n最简单的理解:\n\n- A 做完,B 才能开始\n- C 和 D 可以并行\n- E 要等 C 和 D 都完成\n\n这类结构能帮助 agent 判断:\n\n- 现在能做什么\n- 什么被卡住了\n- 什么能同时做\n\n## Worktree\n\n`worktree` 是 Git 提供的一个机制:\n\n同一个仓库,可以在多个不同目录里同时展开多个工作副本。\n\n它的价值是:\n\n- 并行做多个任务\n- 不互相污染文件改动\n- 便于多 agent 并行工作\n\n## MCP\n\n`MCP` 是 Model Context Protocol。\n\n你可以先把它理解成一套统一接口,让 agent 能接入外部工具。\n\n它解决的核心问题是:\n\n- 工具不必都写死在主程序里\n- 可以通过统一协议接入外部能力\n\n如果你已经知道“能接外部工具”,但开始分不清 server、connection、tool、resource、prompt 这些层,继续看:\n\n- [`data-structures.md`](./data-structures.md)\n- [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md)\n\n## Runtime Task\n\n`runtime task` 指的是:\n\n> 系统当前正在运行、等待完成、或者刚刚结束的一条执行单元。\n\n例如:\n\n- 一个后台 `pytest`\n- 一个正在工作的 teammate\n- 一个正在运行的 monitor\n\n它和 `task` 不一样。\n\n- `task` 更像工作目标\n- `runtime task` 更像执行槽位\n\n如果你总把这两个词混掉,不要只在正文里来回翻,直接去看:\n\n- [`entity-map.md`](./entity-map.md)\n- [`data-structures.md`](./data-structures.md)\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n\n## Teammate\n\n`teammate` 是长期存在的队友 agent。\n\n它和 `subagent` 的区别是:\n\n- `subagent`:一次性委派,干完就结束\n- `teammate`:长期存在,可以反复接任务\n\n如果你发现自己开始把这两个词混用,说明你需要回看:\n\n- `s04`\n- `s15`\n- `entity-map.md`\n\n## Protocol\n\n`protocol` 就是一套提前约好的协作规则。\n\n它回答的是:\n\n- 消息应该长什么样\n- 收到以后要怎么处理\n- 批准、拒绝、超时这些状态怎么记录\n\n在团队章节里,它最常见的形状是:\n\n```text\nrequest\n ->\nresponse\n ->\nstatus update\n```\n\n## Envelope\n\n`envelope` 本意是“信封”。\n\n在程序里,它表示:\n\n> 把正文和一些元信息一起包起来的一条结构化记录。\n\n例如一条协议消息里,正文之外还会附带:\n\n- `from`\n- `to`\n- `request_id`\n- `timestamp`\n\n这整包东西,就可以叫一个 `envelope`。\n\n## State Machine\n\n`state machine` 不是很玄的高级理论。\n\n你可以先把它理解成:\n\n> 一张“状态可以怎么变化”的规则表。\n\n例如:\n\n```text\npending -> approved\npending -> rejected\npending -> expired\n```\n\n这就是一个最小状态机。\n\n## Router\n\n`router` 可以简单理解成“分发器”。\n\n它的任务是:\n\n- 看请求属于哪一类\n- 把它送去正确的处理路径\n\n例如工具层里:\n\n- 本地工具走本地 handler\n- `mcp__...` 工具走 MCP client\n\n## Control Plane\n\n`control plane` 可以理解成“负责协调和控制的一层”。\n\n它通常不直接产出最终业务结果, \n而是负责决定:\n\n- 谁来执行\n- 在什么环境里执行\n- 有没有权限\n- 执行后要不要通知别的模块\n\n这个词第一次看到容易怕。 \n但在本仓库里,你只需要把它先记成:\n\n> 不直接干活,负责协调怎么干活的一层。\n\n## Capability\n\n`capability` 就是“能力项”。\n\n例如在 MCP 里,能力不只可能是工具,还可能包括:\n\n- tools\n- resources\n- prompts\n- elicitation\n\n所以 `capability` 比 `tool` 更宽。\n\n## Resource\n\n`resource` 可以理解成:\n\n> 一个可读取、可引用、但不一定是“执行动作”的外部内容入口。\n\n例如:\n\n- 一份文档\n- 一个只读配置\n- 一块可被模型读取的数据内容\n\n它和 `tool` 的区别是:\n\n- `tool` 更像动作\n- `resource` 更像可读取内容\n\n## Elicitation\n\n`elicitation` 可以先理解成:\n\n> 外部系统反过来向用户要补充输入。\n\n也就是说,不再只是 agent 主动调用外部能力。 \n外部能力也可能说:\n\n“我还缺一点信息,请你补一下。”\n\n## 最容易混的几对词\n\n如果你是初学者,下面这几对词最值得一起记。\n\n| 词对 | 最简单的区分方法 |\n|---|---|\n| `message` vs `system prompt` | 一个更像对话内容,一个更像系统说明 |\n| `todo` vs `task` | 一个更像临时步骤,一个更像持久化工作节点 |\n| `task` vs `runtime task` | 一个管目标,一个管执行 |\n| `subagent` vs `teammate` | 一个一次性,一个长期存在 |\n| `tool` vs `resource` | 一个更像动作,一个更像内容 |\n| `permission` vs `hook` | 一个决定能不能做,一个决定要不要额外插入行为 |\n\n---\n\n如果读文档时又遇到新词卡住,优先回这里,不要硬顶着往后读。\n"
+ },
+ {
+ "version": null,
+ "slug": "s00-architecture-overview",
+ "locale": "zh",
+ "title": "s00: Architecture Overview (架构总览)",
+ "kind": "bridge",
+ "filename": "s00-architecture-overview.md",
+ "content": "# s00: Architecture Overview (架构总览)\n\n> 这一章是全仓库的地图。 \n> 如果你只想先知道“整个系统到底由哪些模块组成、为什么是这个学习顺序”,先读这一章。\n\n## 先说结论\n\n这套仓库的主线是合理的。\n\n它最重要的优点,不是“章节数量多”,而是它把学习过程拆成了四个阶段:\n\n1. 先做出一个真的能工作的 agent。\n2. 再补安全、扩展、记忆和恢复。\n3. 再把临时清单升级成持久化任务系统。\n4. 最后再进入多 agent、隔离执行和外部工具平台。\n\n这个顺序符合初学者的心智。\n\n因为一个新手最需要的,不是先知道所有高级细节,而是先建立一条稳定的主线:\n\n`用户输入 -> 模型思考 -> 调工具 -> 拿结果 -> 继续思考 -> 完成`\n\n只要这条主线还没真正理解,后面的权限、hook、memory、MCP 都会变成一堆零散名词。\n\n## 这套仓库到底要还原什么\n\n本仓库的目标不是逐行复制任何一个生产仓库。\n\n本仓库真正要还原的是:\n\n- 主要模块有哪些\n- 模块之间怎么协作\n- 每个模块的核心职责是什么\n- 关键状态存在哪里\n- 一条请求在系统里是怎么流动的\n\n也就是说,我们追求的是:\n\n**设计主脉络高保真,而不是所有外围实现细节 1:1。**\n\n这很重要。\n\n如果你是为了自己从 0 到 1 做一个类似系统,那么你真正需要掌握的是:\n\n- 核心循环\n- 工具机制\n- 规划与任务\n- 上下文管理\n- 权限与扩展点\n- 持久化\n- 多 agent 协作\n- 工作隔离\n- 外部工具接入\n\n而不是打包、跨平台兼容、历史兼容分支或产品化胶水代码。\n\n## 三条阅读原则\n\n### 1. 先学最小版本,再学结构更完整的版本\n\n比如子 agent。\n\n最小版本只需要:\n\n- 父 agent 发一个子任务\n- 子 agent 用自己的 `messages`\n- 子 agent 返回一个摘要\n\n这已经能解决 80% 的核心问题:上下文隔离。\n\n等这个最小版本你真的能写出来,再去补更完整的能力,比如:\n\n- 继承父上下文的 fork 模式\n- 独立权限\n- 背景运行\n- worktree 隔离\n\n### 2. 每个新名词都必须先解释\n\n本仓库会经常用到一些词:\n\n- `state machine`\n- `dispatch map`\n- `dependency graph`\n- `frontmatter`\n- `worktree`\n- `MCP`\n\n如果你对这些词不熟,不要硬扛。 \n应该立刻去看术语表:[`glossary.md`](./glossary.md)\n\n如果你想先知道“这套仓库到底教什么、不教什么”,建议配合看:\n\n- [`teaching-scope.md`](./teaching-scope.md)\n\n如果你想先把最关键的数据结构建立成整体地图,可以配合看:\n\n- [`data-structures.md`](./data-structures.md)\n\n如果你已经知道章节顺序没问题,但一打开本地 `agents/*.py` 就会重新乱掉,建议再配合看:\n\n- [`s00f-code-reading-order.md`](./s00f-code-reading-order.md)\n\n### 3. 不把复杂外围细节伪装成“核心机制”\n\n好的教学,不是把一切都讲进去。\n\n好的教学,是把真正关键的东西讲完整,把不关键但很复杂的东西先拿掉。\n\n所以本仓库会刻意省略一些不属于主干的内容,比如:\n\n- 打包与发布\n- 企业策略接线\n- 遥测\n- 多客户端表层集成\n- 历史兼容层\n\n## 建议配套阅读的文档\n\n除了主线章节,我建议把下面两份文档当作全程辅助地图:\n\n| 文档 | 用途 |\n|---|---|\n| [`teaching-scope.md`](./teaching-scope.md) | 帮你分清哪些内容属于教学主线,哪些只是维护者侧补充 |\n| [`data-structures.md`](./data-structures.md) | 帮你集中理解整个系统的关键状态和数据结构 |\n| [`s00f-code-reading-order.md`](./s00f-code-reading-order.md) | 帮你把“章节顺序”和“本地代码阅读顺序”对齐,避免重新乱翻源码 |\n\n如果你已经读到中后半程,想把“章节之间缺的那一层”补上,再加看下面这些桥接文档:\n\n| 文档 | 它补的是什么 |\n|---|---|\n| [`s00d-chapter-order-rationale.md`](./s00d-chapter-order-rationale.md) | 为什么这套课要按现在这个顺序讲,哪些重排会把读者心智讲乱 |\n| [`s00e-reference-module-map.md`](./s00e-reference-module-map.md) | 参考仓库里真正重要的模块簇,和当前课程章节是怎样一一对应的 |\n| [`s00a-query-control-plane.md`](./s00a-query-control-plane.md) | 为什么一个更完整的系统不能只靠 `messages[] + while True` |\n| [`s00b-one-request-lifecycle.md`](./s00b-one-request-lifecycle.md) | 一条请求如何从用户输入一路流过 query、tools、permissions、tasks、teams、MCP 再回到主循环 |\n| [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md) | 为什么工具层不只是 `tool_name -> handler` |\n| [`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md) | 为什么 system prompt 不是模型完整输入的全部 |\n| [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) | 为什么任务板里的 task 和正在运行的 task 不是一回事 |\n| [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md) | 为什么 MCP 正文先讲 tools-first,但平台层还要再补一张地图 |\n| [`entity-map.md`](./entity-map.md) | 帮你把 message、task、runtime task、subagent、teammate、worktree、MCP server 这些实体彻底分开 |\n\n## 四阶段学习路径\n\n### 阶段 1:核心单 agent (`s01-s06`)\n\n目标:先做出一个能干活的 agent。\n\n| 章节 | 学什么 | 解决什么问题 |\n|---|---|---|\n| `s01` | Agent Loop | 没有循环,就没有 agent |\n| `s02` | Tool Use | 让模型从“会说”变成“会做” |\n| `s03` | Todo / Planning | 防止大任务乱撞 |\n| `s04` | Subagent | 防止上下文被大任务污染 |\n| `s05` | Skills | 按需拿知识,不把所有知识塞进提示词 |\n| `s06` | Context Compact | 防止上下文无限膨胀 |\n\n这一阶段结束后,你已经有了一个真正可运行的 coding agent 雏形。\n\n### 阶段 2:生产加固 (`s07-s11`)\n\n目标:让 agent 不只是能跑,而是更安全、更稳、更可扩展。\n\n| 章节 | 学什么 | 解决什么问题 |\n|---|---|---|\n| `s07` | Permission System | 危险操作先过权限关 |\n| `s08` | Hook System | 不改主循环也能扩展行为 |\n| `s09` | Memory System | 让真正有价值的信息跨会话存在 |\n| `s10` | System Prompt | 把系统说明、工具、约束组装成稳定输入 |\n| `s11` | Error Recovery | 出错后能恢复,而不是直接崩溃 |\n\n### 阶段 3:任务管理 (`s12-s14`)\n\n目标:把“聊天中的清单”升级成“磁盘上的任务图”。\n\n| 章节 | 学什么 | 解决什么问题 |\n|---|---|---|\n| `s12` | Task System | 大任务要有持久结构 |\n| `s13` | Background Tasks | 慢操作不应该卡住前台思考 |\n| `s14` | Cron Scheduler | 让系统能在未来自动做事 |\n\n### 阶段 4:多 agent 与外部系统 (`s15-s19`)\n\n目标:从单 agent 升级成真正的平台。\n\n| 章节 | 学什么 | 解决什么问题 |\n|---|---|---|\n| `s15` | Agent Teams | 让多个 agent 协作 |\n| `s16` | Team Protocols | 让协作有统一规则 |\n| `s17` | Autonomous Agents | 让 agent 自己找活、认领任务 |\n| `s18` | Worktree Isolation | 并行工作时互不踩目录 |\n| `s19` | MCP & Plugin | 接入外部工具与外部能力 |\n\n## 章节速查表:每章到底新增了哪一层状态\n\n很多读者读到中途会开始觉得:\n\n- 这一章到底是在加工具,还是在加状态\n- 这个机制是“输入层”的,还是“执行层”的\n- 学完这一章以后,我手里到底多了一个什么东西\n\n所以这里给一张全局速查表。 \n读每章以前,先看这一行;读完以后,再回来检查自己是不是真的吃透了这一行。\n\n| 章节 | 新增的核心结构 | 它接在系统哪一层 | 学完你应该会什么 |\n|---|---|---|---|\n| `s01` | `messages` / `LoopState` | 主循环 | 手写一个最小 agent 闭环 |\n| `s02` | `ToolSpec` / `ToolDispatchMap` | 工具层 | 把模型意图路由成真实动作 |\n| `s03` | `TodoItem` / `PlanState` | 过程规划层 | 让 agent 按步骤推进,而不是乱撞 |\n| `s04` | `SubagentContext` | 执行隔离层 | 把探索性工作丢进干净子上下文 |\n| `s05` | `SkillRegistry` / `SkillContent` | 知识注入层 | 只在需要时加载额外知识 |\n| `s06` | `CompactSummary` / `PersistedOutput` | 上下文管理层 | 控制上下文大小又不丢主线 |\n| `s07` | `PermissionRule` / `PermissionDecision` | 安全控制层 | 让危险动作先经过决策管道 |\n| `s08` | `HookEvent` / `HookResult` | 扩展控制层 | 不改主循环也能插入扩展逻辑 |\n| `s09` | `MemoryEntry` / `MemoryStore` | 持久上下文层 | 只把真正跨会话有价值的信息留下 |\n| `s10` | `PromptParts` / `SystemPromptBlock` | 输入组装层 | 把模型输入拆成可管理的管道 |\n| `s11` | `RecoveryState` / `TransitionReason` | 恢复控制层 | 出错后知道为什么继续、怎么继续 |\n| `s12` | `TaskRecord` / `TaskStatus` | 工作图层 | 把临时清单升级成持久化任务图 |\n| `s13` | `RuntimeTaskState` / `Notification` | 运行时执行层 | 让慢任务后台运行、稍后回送结果 |\n| `s14` | `ScheduleRecord` / `CronTrigger` | 定时触发层 | 让时间本身成为工作触发器 |\n| `s15` | `TeamMember` / `MessageEnvelope` | 多 agent 基础层 | 让队友长期存在、反复接活 |\n| `s16` | `ProtocolEnvelope` / `RequestRecord` | 协作协议层 | 让团队从自由聊天升级成结构化协作 |\n| `s17` | `ClaimPolicy` / `AutonomyState` | 自治调度层 | 让 agent 空闲时自己找活、恢复工作 |\n| `s18` | `WorktreeRecord` / `TaskBinding` | 隔离执行层 | 给并行任务分配独立工作目录 |\n| `s19` | `MCPServerConfig` / `CapabilityRoute` | 外部能力层 | 把外部能力并入系统主控制面 |\n\n## 整个系统的大图\n\n先看最重要的一张图:\n\n```text\nUser\n |\n v\nmessages[]\n |\n v\n+-------------------------+\n| Agent Loop (s01) |\n| |\n| 1. 组装输入 |\n| 2. 调模型 |\n| 3. 看 stop_reason |\n| 4. 如果要调工具就执行 |\n| 5. 把结果写回 messages |\n| 6. 继续下一轮 |\n+-------------------------+\n |\n +------------------------------+\n | |\n v v\nTool Pipeline Context / State\n(s02, s07, s08) (s03, s06, s09, s10, s11)\n | |\n v v\nTasks / Teams / Worktree / MCP (s12-s19)\n```\n\n你可以把它理解成三层:\n\n### 第一层:主循环\n\n这是系统心脏。\n\n它只做一件事: \n**不停地推动“思考 -> 行动 -> 观察 -> 再思考”的循环。**\n\n### 第二层:横切机制\n\n这些机制不是替代主循环,而是“包在主循环周围”:\n\n- 权限\n- hooks\n- memory\n- prompt 组装\n- 错误恢复\n- 上下文压缩\n\n它们的作用,是让主循环更安全、更稳定、更聪明。\n\n### 第三层:更大的工作平台\n\n这些机制把单 agent 升级成更完整的系统:\n\n- 任务图\n- 后台任务\n- 多 agent 团队\n- worktree 隔离\n- MCP 外部工具\n\n## 你真正需要掌握的关键状态\n\n理解 agent,最重要的不是背很多功能名,而是知道**状态放在哪里**。\n\n下面是这个仓库里最关键的几类状态:\n\n### 1. 对话状态:`messages`\n\n这是 agent 当前上下文的主体。\n\n它保存:\n\n- 用户说了什么\n- 模型回复了什么\n- 调用了哪些工具\n- 工具返回了什么\n\n你可以把它想成 agent 的“工作记忆”。\n\n### 2. 工具注册表:`tools` / `handlers`\n\n这是一张“工具名 -> Python 函数”的映射表。\n\n这类结构常被叫做 `dispatch map`。\n\n意思很简单:\n\n- 模型说“我要调用 `read_file`”\n- 代码就去表里找 `read_file` 对应的函数\n- 找到以后执行\n\n### 3. 计划与任务状态:`todo` / `tasks`\n\n这部分保存:\n\n- 当前有哪些事要做\n- 哪些已经完成\n- 哪些被别的任务阻塞\n- 哪些可以并行\n\n### 4. 权限与策略状态\n\n这部分保存:\n\n- 当前权限模式是什么\n- 允许规则有哪些\n- 拒绝规则有哪些\n- 最近是否连续被拒绝\n\n### 5. 持久化状态\n\n这部分保存那些“不该跟着一次对话一起消失”的东西:\n\n- memory 文件\n- task 文件\n- transcript\n- background task 输出\n- worktree 绑定信息\n\n## 如果你想做出结构完整的版本,至少要有哪些数据结构\n\n如果你的目标是自己写一个结构完整、接近真实主脉络的类似系统,最低限度要把下面这些数据结构设计清楚:\n\n```python\nclass AppState:\n messages: list\n tools: dict\n tool_schemas: list\n\n todo: object | None\n tasks: object | None\n\n permissions: object | None\n hooks: object | None\n memories: object | None\n prompt_builder: object | None\n\n compact_state: dict\n recovery_state: dict\n\n background: object | None\n cron: object | None\n\n teammates: object | None\n worktree_session: dict | None\n mcp_clients: dict\n```\n\n这不是要求你一开始就把这些全写完。\n\n这张表的作用只是告诉你:\n\n**一个像样的 agent 系统,不只是 `messages + tools`。**\n\n它最终会长成一个带很多子模块的状态系统。\n\n## 一条请求是怎么流动的\n\n```text\n1. 用户发来任务\n2. 系统组装 prompt 和上下文\n3. 模型返回普通文本,或者返回 tool_use\n4. 如果返回 tool_use:\n - 先过 permission\n - 再过 hook\n - 然后执行工具\n - 把 tool_result 写回 messages\n5. 主循环继续\n6. 如果任务太大:\n - 可能写入 todo / tasks\n - 可能派生 subagent\n - 可能触发 compact\n - 可能走 background / team / worktree / MCP\n7. 直到模型结束这一轮\n```\n\n这条流是全仓库最重要的主脉络。\n\n你在后面所有章节里看到的机制,本质上都只是插在这条流的不同位置。\n\n## 读者最容易混淆的几组概念\n\n### `Todo` 和 `Task` 不是一回事\n\n- `Todo`:轻量、临时、偏会话内\n- `Task`:持久化、带状态、带依赖关系\n\n### `Memory` 和 `Context` 不是一回事\n\n- `Context`:这一轮工作临时需要的信息\n- `Memory`:未来别的会话也可能仍然有价值的信息\n\n### `Subagent` 和 `Teammate` 不是一回事\n\n- `Subagent`:通常是当前 agent 派生出来的一次性帮手\n- `Teammate`:更偏向长期存在于团队中的协作角色\n\n### `Prompt` 和 `System Reminder` 不是一回事\n\n- `System Prompt`:较稳定的系统级输入\n- `System Reminder`:每轮动态变化的补充上下文\n\n## 这套仓库刻意省略了什么\n\n为了让初学者能顺着学下去,本仓库不会把下面这些内容塞进主线:\n\n- 产品级启动流程里的全部外围初始化\n- 真实商业产品中的账号、策略、遥测、灰度等逻辑\n- 只服务于兼容性和历史负担的复杂分支\n- 某些非常复杂但教学收益很低的边角机制\n\n这不是因为这些东西“不存在”。\n\n而是因为对一个从 0 到 1 造类似系统的读者来说,主干先于枝叶。\n\n## 这一章之后怎么读\n\n推荐顺序:\n\n1. 先读 `s01` 和 `s02`\n2. 然后读 `s03` 到 `s06`\n3. 进入 `s07` 到 `s10`\n4. 接着补 `s11`\n5. 最后再读 `s12` 到 `s19`\n\n如果你在某一章觉得名词开始打结,回来看这一章和术语表就够了。\n\n---\n\n**一句话记住全仓库:**\n\n先做出能工作的最小循环,再一层一层给它补上规划、隔离、安全、记忆、任务、协作和外部能力。\n"
+ },
+ {
+ "version": null,
+ "slug": "s00a-query-control-plane",
+ "locale": "zh",
+ "title": "s00a: Query Control Plane (查询控制平面)",
+ "kind": "bridge",
+ "filename": "s00a-query-control-plane.md",
+ "content": "# s00a: Query Control Plane (查询控制平面)\n\n> 这不是新的主线章节,而是一份桥接文档。 \n> 它用来回答一个问题:\n>\n> **为什么一个结构更完整的 agent,不会只靠 `messages[]` 和一个 `while True` 就够了?**\n\n## 这一篇为什么要存在\n\n主线里的 `s01` 会先教你做出一个最小可运行循环:\n\n```text\n用户输入\n ->\n模型回复\n ->\n如果要调工具就执行\n ->\n把结果喂回去\n ->\n继续下一轮\n```\n\n这条主线是对的,而且必须先学这个。\n\n但当系统开始长功能以后,真正支撑一个完整 harness 的,不再只是“循环”本身,而是:\n\n**一层专门负责管理查询过程的控制平面。**\n\n这一层在真实系统里通常会统一处理:\n\n- 当前对话消息\n- 当前轮次\n- 为什么继续下一轮\n- 是否正在恢复错误\n- 是否已经压缩过上下文\n- 是否需要切换输出预算\n- hook 是否暂时影响了结束条件\n\n如果不把这层讲出来,读者虽然能做出一个能跑的 demo,但很难自己把系统推到接近 95%-99% 的完成度。\n\n## 先解释几个名词\n\n### 什么是 query\n\n这里的 `query` 不是“数据库查询”。\n\n这里说的 query,更接近:\n\n> 系统为了完成用户当前这一次请求,而运行的一整段主循环过程。\n\n也就是说:\n\n- 用户说一句话\n- 系统可能要经过很多轮模型调用和工具调用\n- 最后才结束这一次请求\n\n这整段过程,就可以看成一条 query。\n\n### 什么是控制平面\n\n`控制平面` 这个词第一次看会有点抽象。\n\n它的意思其实很简单:\n\n> 不是直接做业务动作,而是负责协调、调度、决定流程怎么往下走的一层。\n\n在这里:\n\n- 模型回复内容,算“业务内容”\n- 工具执行结果,算“业务动作”\n- 决定“要不要继续下一轮、为什么继续、现在属于哪种继续”,这层就是控制平面\n\n### 什么是 transition\n\n`transition` 可以翻成“转移原因”。\n\n它回答的是:\n\n> 上一轮为什么没有结束,而是继续下一轮了?\n\n例如:\n\n- 因为工具刚执行完\n- 因为输出被截断,要续写\n- 因为刚做完压缩,要重试\n- 因为 hook 要求继续\n- 因为预算还允许继续\n\n## 最小心智模型\n\n先把 query 控制平面想成 3 层:\n\n```text\n1. 输入层\n - messages\n - system prompt\n - user/system context\n\n2. 控制层\n - 当前状态 state\n - 当前轮 turn\n - 当前继续原因 transition\n - 恢复/压缩/预算等标记\n\n3. 执行层\n - 调模型\n - 执行工具\n - 写回消息\n```\n\n它的工作不是“替代主循环”,而是:\n\n**让主循环从一个小 demo,升级成一个能管理很多分支和状态的系统。**\n\n## 为什么只靠 `messages[]` 不够\n\n很多初学者第一次实现 agent 时,会把所有状态都堆进 `messages[]`。\n\n这在最小 demo 里没问题。\n\n但一旦系统长出下面这些能力,就不够了:\n\n- 你要知道自己是不是已经做过一次 reactive compact\n- 你要知道输出被截断已经续写了几次\n- 你要知道这次继续是因为工具,还是因为错误恢复\n- 你要知道当前轮是否启用了特殊输出预算\n\n这些信息不是“对话内容”,而是“流程控制状态”。\n\n所以它们不该都硬塞进 `messages[]` 里。\n\n## 关键数据结构\n\n### 1. QueryParams\n\n这是进入 query 引擎时的外部输入。\n\n最小形状可以这样理解:\n\n```python\nparams = {\n \"messages\": [...],\n \"system_prompt\": \"...\",\n \"user_context\": {...},\n \"system_context\": {...},\n \"tool_use_context\": {...},\n \"fallback_model\": None,\n \"max_output_tokens_override\": None,\n \"max_turns\": None,\n}\n```\n\n它的作用是:\n\n- 带进来这次查询一开始已知的输入\n- 这些值大多不在每轮里随便乱改\n\n### 2. QueryState\n\n这才是跨迭代真正会变化的部分。\n\n最小教学版建议你把它显式做成一个结构:\n\n```python\nstate = {\n \"messages\": [...],\n \"tool_use_context\": {...},\n \"continuation_count\": 0,\n \"has_attempted_compact\": False,\n \"max_output_tokens_override\": None,\n \"stop_hook_active\": False,\n \"turn_count\": 1,\n \"transition\": None,\n}\n```\n\n它的价值在于:\n\n- 把“会变的流程状态”集中放在一起\n- 让每个 continue site 修改的是同一份 state,而不是散落在很多局部变量里\n\n### 3. TransitionReason\n\n建议你单独定义一组继续原因:\n\n```python\nTRANSITIONS = (\n \"tool_result_continuation\",\n \"max_tokens_recovery\",\n \"compact_retry\",\n \"transport_retry\",\n \"stop_hook_continuation\",\n \"budget_continuation\",\n)\n```\n\n这不是为了炫技。\n\n它的作用很实在:\n\n- 日志更清楚\n- 调试更清楚\n- 测试更清楚\n- 教学更清楚\n\n## 最小实现\n\n### 第一步:把外部输入和内部状态分开\n\n```python\ndef query(params):\n state = {\n \"messages\": params[\"messages\"],\n \"tool_use_context\": params[\"tool_use_context\"],\n \"continuation_count\": 0,\n \"has_attempted_compact\": False,\n \"max_output_tokens_override\": params.get(\"max_output_tokens_override\"),\n \"turn_count\": 1,\n \"transition\": None,\n }\n```\n\n### 第二步:每一轮先读 state,再决定如何执行\n\n```python\nwhile True:\n messages = state[\"messages\"]\n transition = state[\"transition\"]\n turn_count = state[\"turn_count\"]\n\n response = call_model(...)\n ...\n```\n\n### 第三步:所有“继续下一轮”的地方都写回 state\n\n```python\nif response.stop_reason == \"tool_use\":\n state[\"messages\"] = append_tool_results(...)\n state[\"transition\"] = \"tool_result_continuation\"\n state[\"turn_count\"] += 1\n continue\n\nif response.stop_reason == \"max_tokens\":\n state[\"messages\"].append({\"role\": \"user\", \"content\": CONTINUE_MESSAGE})\n state[\"continuation_count\"] += 1\n state[\"transition\"] = \"max_tokens_recovery\"\n continue\n```\n\n这一点非常关键。\n\n**不要只做 `continue`,要知道自己为什么 continue。**\n\n## 一张真正清楚的心智图\n\n```text\nparams\n |\n v\ninit state\n |\n v\nquery loop\n |\n +-- normal assistant end --------------> terminal\n |\n +-- tool_use --------------------------> write tool_result -> transition=tool_result_continuation\n |\n +-- max_tokens ------------------------> inject continue -> transition=max_tokens_recovery\n |\n +-- prompt too long -------------------> compact -> transition=compact_retry\n |\n +-- transport error -------------------> backoff -> transition=transport_retry\n |\n +-- stop hook asks to continue --------> transition=stop_hook_continuation\n```\n\n## 它和 `s01`、`s11` 的关系\n\n- `s01` 负责建立“最小主循环”\n- `s11` 负责建立“错误恢复分支”\n- 这一篇负责把两者再往上抽象一层,解释为什么一个更完整的系统会出现一个 query control plane\n\n所以这篇不是替代主线,而是把主线补完整。\n\n## 初学者最容易犯的错\n\n### 1. 把所有控制状态都塞进消息里\n\n这样日志和调试都会很难看,也会让消息层和控制层混在一起。\n\n### 2. `continue` 了,但没有记录为什么继续\n\n短期看起来没问题,系统一复杂就会变成黑盒。\n\n### 3. 每个分支都直接改很多局部变量\n\n这样后面你很难看出“哪些状态是跨轮共享的”。\n\n### 4. 把 query loop 讲成“只是一个 while True”\n\n这对最小 demo 是真话,对一个正在长出控制面的 harness 就不是完整真话了。\n\n## 教学边界\n\n这篇最重要的,不是把所有控制状态一次列满,而是先让你守住三件事:\n\n- query loop 不只是 `while True`,而是一条带着共享状态往前推进的控制面\n- 每次 `continue` 都应该有明确原因,而不是黑盒跳转\n- 消息层、工具回写、压缩恢复、重试恢复,最终都要回到同一份 query 状态上\n\n更细的 `transition taxonomy`、预算跟踪、prefetch 等扩展,可以放到你把这条最小控制面真正手搓稳定以后再补。\n\n## 一句话记住\n\n**更完整的 query loop 不只是“循环”,而是“拿着一份跨轮状态不断推进的查询控制平面”。**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00b-one-request-lifecycle",
+ "locale": "zh",
+ "title": "s00b: One Request Lifecycle (一次请求的完整生命周期)",
+ "kind": "bridge",
+ "filename": "s00b-one-request-lifecycle.md",
+ "content": "# s00b: One Request Lifecycle (一次请求的完整生命周期)\n\n> 这是一份桥接文档。 \n> 它不替代主线章节,而是把整套系统串成一条真正连续的执行链。\n>\n> 它要回答的问题是:\n>\n> **用户的一句话,进入系统以后,到底是怎样一路流动、分发、执行、再回到主循环里的?**\n\n## 为什么必须补这一篇\n\n很多读者在按顺序看教程时,会逐章理解:\n\n- `s01` 讲循环\n- `s02` 讲工具\n- `s03` 讲规划\n- `s07` 讲权限\n- `s09` 讲 memory\n- `s12-s19` 讲任务、多 agent、MCP\n\n每章单看都能懂。\n\n但一旦开始自己实现,就会很容易卡住:\n\n- 这些模块到底谁先谁后?\n- 一条请求进来时,先走 prompt,还是先走 memory?\n- 工具执行前,权限和 hook 在哪一层?\n- task、runtime task、teammate、worktree、MCP 到底是在一次请求里的哪个阶段介入?\n\n所以你需要一张“纵向流程图”。\n\n## 先给一条最重要的总图\n\n```text\n用户请求\n |\n v\nQuery State 初始化\n |\n v\n组装 system prompt / messages / reminders\n |\n v\n调用模型\n |\n +-- 普通回答 -------------------------------> 结束本次请求\n |\n +-- tool_use\n |\n v\n Tool Router\n |\n +-- 权限判断\n +-- Hook 拦截/注入\n +-- 本地工具 / MCP / agent / task / team\n |\n v\n 执行结果\n |\n +-- 可能写入 task / runtime task / memory / worktree 状态\n |\n v\n tool_result 写回 messages\n |\n v\n Query State 更新\n |\n v\n 下一轮继续\n```\n\n你可以把整条链先理解成三层:\n\n1. `Query Loop`\n2. `Tool Control Plane`\n3. `Platform State`\n\n## 第 1 段:用户请求进入查询控制平面\n\n当用户说:\n\n```text\n修复 tests/test_auth.py 的失败,并告诉我原因\n```\n\n系统最先做的,不是立刻跑工具,而是先为这次请求建立一份查询状态。\n\n最小可以理解成:\n\n```python\nquery_state = {\n \"messages\": [{\"role\": \"user\", \"content\": user_text}],\n \"turn_count\": 1,\n \"transition\": None,\n \"tool_use_context\": {...},\n}\n```\n\n这里的重点是:\n\n**这次请求不是“单次 API 调用”,而是一段可能包含很多轮的查询过程。**\n\n如果你对这一层还不够熟,先回看:\n\n- [`s01-the-agent-loop.md`](./s01-the-agent-loop.md)\n- [`s00a-query-control-plane.md`](./s00a-query-control-plane.md)\n\n## 第 2 段:组装本轮真正送给模型的输入\n\n主循环不会直接把原始 `messages` 裸发出去。\n\n在更完整的系统里,它通常会先组装:\n\n- system prompt blocks\n- 规范化后的 messages\n- memory section\n- 当前轮 reminder\n- 工具清单\n\n也就是说,真正发给模型的通常是:\n\n```text\nsystem prompt\n+ normalized messages\n+ tools\n+ optional reminders / attachments\n```\n\n这里涉及的章节是:\n\n- `s09` memory\n- `s10` system prompt\n- `s10a` message & prompt pipeline\n\n这一段的核心心智是:\n\n**system prompt 不是全部输入,它只是输入管道中的一段。**\n\n## 第 3 段:模型产出两类东西\n\n模型这一轮的输出,最关键地分成两种:\n\n### 第一种:普通回复\n\n如果模型直接给出结论或说明,本次请求可能就结束了。\n\n### 第二种:动作意图\n\n也就是工具调用。\n\n例如:\n\n```text\nread_file(\"tests/test_auth.py\")\nbash(\"pytest tests/test_auth.py -q\")\ntodo([...])\nload_skill(\"code-review\")\ntask_create(...)\nmcp__postgres__query(...)\n```\n\n这时候系统真正收到的,不只是“文本”,而是:\n\n> 模型想让真实世界发生某些动作。\n\n## 第 4 段:工具路由层接管动作意图\n\n一旦出现 `tool_use`,系统就进入工具控制平面。\n\n这一层至少要回答:\n\n1. 这是什么工具?\n2. 它应该路由到哪类能力来源?\n3. 执行前要不要先过权限?\n4. hook 有没有要拦截或补充?\n5. 它执行时能访问哪些共享状态?\n\n最小图可以这样看:\n\n```text\ntool_use\n |\n v\nTool Router\n |\n +-- native tool handler\n +-- MCP client\n +-- agent/team/task handler\n```\n\n如果你对这一层不够清楚,回看:\n\n- [`s02-tool-use.md`](./s02-tool-use.md)\n- [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md)\n\n## 第 5 段:权限系统决定“能不能执行”\n\n不是所有动作意图都应该直接变成真实执行。\n\n例如:\n\n- 写文件\n- 跑 bash\n- 改工作目录\n- 调外部服务\n\n这时会先进入权限判断:\n\n```text\ndeny rules\n -> mode\n -> allow rules\n -> ask user\n```\n\n权限系统处理的是:\n\n> 这次动作是否允许发生。\n\n相关章节:\n\n- [`s07-permission-system.md`](./s07-permission-system.md)\n\n## 第 6 段:Hook 可以在边上做扩展\n\n通过权限检查以后,系统还可能在工具执行前后跑 hook。\n\n你可以把 hook 理解成:\n\n> 不改主循环主干,也能插入自定义行为的扩展点。\n\n例如:\n\n- 执行前记录日志\n- 执行后做额外检查\n- 根据结果注入额外提醒\n\n相关章节:\n\n- [`s08-hook-system.md`](./s08-hook-system.md)\n\n## 第 7 段:真正执行动作,并影响不同层的状态\n\n这是很多人最容易低估的一段。\n\n工具执行结果,不只是“一段文本输出”。\n\n它还可能修改系统别的状态层。\n\n### 例子 1:规划状态\n\n如果工具是 `todo`,它会更新的是当前会话计划。\n\n相关章节:\n\n- [`s03-todo-write.md`](./s03-todo-write.md)\n\n### 例子 2:持久任务图\n\n如果工具是 `task_create` / `task_update`,它会修改磁盘上的任务板。\n\n相关章节:\n\n- [`s12-task-system.md`](./s12-task-system.md)\n\n### 例子 3:运行时任务\n\n如果工具启动了后台 bash、后台 agent 或监控任务,它会创建 runtime task。\n\n相关章节:\n\n- [`s13-background-tasks.md`](./s13-background-tasks.md)\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n\n### 例子 4:多 agent / teammate\n\n如果工具是 `delegate`、`spawn_agent` 一类,它会在平台层生成新的执行单元。\n\n相关章节:\n\n- [`s15-agent-teams.md`](./s15-agent-teams.md)\n- [`s16-team-protocols.md`](./s16-team-protocols.md)\n- [`s17-autonomous-agents.md`](./s17-autonomous-agents.md)\n\n### 例子 5:worktree\n\n如果系统要为某个任务提供隔离工作目录,这会影响文件系统级执行环境。\n\n相关章节:\n\n- [`s18-worktree-task-isolation.md`](./s18-worktree-task-isolation.md)\n\n### 例子 6:MCP\n\n如果调用的是外部 MCP 能力,那么执行主体可能根本不在本地 handler,而在外部能力端。\n\n相关章节:\n\n- [`s19-mcp-plugin.md`](./s19-mcp-plugin.md)\n- [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md)\n\n## 第 8 段:执行结果被包装回消息流\n\n不管执行落在哪一层,最后都要回到同一个位置:\n\n```text\ntool_result -> messages\n```\n\n这是整个系统最核心的闭环。\n\n因为无论工具背后多复杂,模型下一轮真正能继续工作的依据,仍然是:\n\n> 系统把执行结果重新写回了它可见的消息流。\n\n这也是为什么 `s01` 永远是根。\n\n## 第 9 段:主循环根据结果决定下一轮是否继续\n\n当 `tool_result` 写回以后,查询状态也会一起更新:\n\n- `messages` 变了\n- `turn_count` 增加了\n- `transition` 被记录成某种续行原因\n\n这时系统就进入下一轮。\n\n如果中间发生下面这些情况,控制平面还会继续介入:\n\n- 上下文太长,需要压缩\n- 输出被截断,需要续写\n- 请求失败,需要恢复\n\n相关章节:\n\n- [`s06-context-compact.md`](./s06-context-compact.md)\n- [`s11-error-recovery.md`](./s11-error-recovery.md)\n\n## 第 10 段:哪些信息不会跟着一次请求一起结束\n\n这也是非常容易混的地方。\n\n一次请求结束后,并不是所有状态都随之消失。\n\n### 会跟着当前请求结束的\n\n- 当前轮 messages 中的临时推进过程\n- 会话内 todo 状态\n- 当前轮 reminder\n\n### 可能跨请求继续存在的\n\n- memory\n- 持久任务图\n- runtime task 输出\n- worktree\n- MCP 连接状态\n\n所以你要逐渐学会区分:\n\n```text\nquery-scope state\nsession-scope state\nproject-scope state\nplatform-scope state\n```\n\n## 用一个完整例子串一次\n\n还是用这个请求:\n\n```text\n修复 tests/test_auth.py 的失败,并告诉我原因\n```\n\n系统可能会这样流动:\n\n1. 用户请求进入 `QueryState`\n2. system prompt + memory + tools 被组装好\n3. 模型先调用 `todo`,写出三步计划\n4. 模型调用 `read_file(\"tests/test_auth.py\")`\n5. 工具路由到本地文件读取 handler\n6. 读取结果包装成 `tool_result` 写回消息流\n7. 下一轮模型调用 `bash(\"pytest tests/test_auth.py -q\")`\n8. 权限系统判断这条命令是否可执行\n9. 执行测试,输出太长则先落盘并留预览\n10. 失败日志回到消息流\n11. 模型再读实现文件并修改代码\n12. 修改后再跑测试\n13. 如果对话变长,`s06` 触发压缩\n14. 如果任务被拆给子 agent,`s15-s17` 介入\n15. 最后模型输出结论,本次请求结束\n\n你会发现:\n\n**整套系统再复杂,也始终没有脱离“输入 -> 动作意图 -> 执行 -> 结果写回 -> 下一轮”这条主骨架。**\n\n## 读这篇时最该记住的三件事\n\n### 1. 所有模块都不是平铺摆在那里的\n\n它们是在一次请求的不同阶段依次介入的。\n\n### 2. 真正的闭环只有一个\n\n那就是:\n\n```text\ntool_result 回到 messages\n```\n\n### 3. 很多高级机制,本质上只是围绕这条闭环加的保护层\n\n例如:\n\n- 权限是执行前保护层\n- hook 是扩展层\n- compact 是上下文预算保护层\n- recovery 是出错后的恢复层\n- task/team/worktree/MCP 是更大的平台能力层\n\n## 一句话记住\n\n**一次请求的完整生命周期,本质上就是:系统围绕同一条主循环,把不同模块按阶段接进来,最终持续把真实执行结果送回模型继续推理。**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00c-query-transition-model",
+ "locale": "zh",
+ "title": "s00c: Query Transition Model (查询转移模型)",
+ "kind": "bridge",
+ "filename": "s00c-query-transition-model.md",
+ "content": "# s00c: Query Transition Model (查询转移模型)\n\n> 这篇桥接文档专门解决一个问题:\n>\n> **为什么一个只会 `continue` 的 agent,不足以支撑完整系统,而必须显式知道“为什么继续到下一轮”?**\n\n## 这一篇为什么要存在\n\n主线里:\n\n- `s01` 先教你最小循环\n- `s06` 开始教上下文压缩\n- `s11` 开始教错误恢复\n\n这些都对。 \n但如果你只分别学这几章,脑子里很容易还是停留在一种过于粗糙的理解:\n\n> “反正 `continue` 了就继续呗。”\n\n这在最小 demo 里能跑。 \n但当系统开始长出恢复、压缩和外部控制以后,这样理解会很快失灵。\n\n因为系统继续下一轮的原因其实很多,而且这些原因不是一回事:\n\n- 工具刚执行完,要把结果喂回模型\n- 输出被截断了,要续写\n- 上下文刚压缩完,要重试\n- 运输层刚超时了,要退避后重试\n- stop hook 要求当前 turn 先不要结束\n- token budget 还允许继续推进\n\n如果你不把这些“继续原因”从一开始拆开,后面会出现三个大问题:\n\n- 日志看不清\n- 测试不好写\n- 教学心智会越来越模糊\n\n## 先解释几个名词\n\n### 什么叫 transition\n\n这里的 `transition`,你可以先把它理解成:\n\n> 上一轮为什么转移到了下一轮。\n\n它不是“消息内容”,而是“流程原因”。\n\n### 什么叫 continuation\n\ncontinuation 就是:\n\n> 这条 query 当前还没有结束,要继续推进。\n\n但 continuation 不止一种。\n\n### 什么叫 query boundary\n\nquery boundary 就是一轮和下一轮之间的边界。\n\n每次跨过这个边界,系统最好都知道:\n\n- 这次为什么继续\n- 这次继续前有没有修改状态\n- 这次继续后应该怎么读主循环\n\n## 最小心智模型\n\n先不要把 query 想成一条线。\n\n更接近真实情况的理解是:\n\n```text\n一条 query\n = 一组“继续原因”串起来的状态转移\n```\n\n例如:\n\n```text\n用户输入\n ->\n模型产生 tool_use\n ->\n工具执行完\n ->\ntool_result_continuation\n ->\n模型输出过长\n ->\nmax_tokens_recovery\n ->\n压缩后继续\n ->\ncompact_retry\n ->\n最终结束\n```\n\n这样看,你会更容易理解:\n\n**系统不是单纯在 while loop 里转圈,而是在一串显式的转移原因里推进。**\n\n## 关键数据结构\n\n### 1. QueryState 里的 `transition`\n\n最小版建议就把这类字段显式放进状态里:\n\n```python\nstate = {\n \"messages\": [...],\n \"turn_count\": 3,\n \"has_attempted_compact\": False,\n \"continuation_count\": 1,\n \"transition\": None,\n}\n```\n\n这里的 `transition` 不是可有可无。\n\n它的意义是:\n\n- 当前这轮为什么会出现\n- 下一轮日志应该怎么解释\n- 测试时应该断言哪条路径被走到\n\n### 2. TransitionReason\n\n教学版最小可以先这样分:\n\n```python\nTRANSITIONS = (\n \"tool_result_continuation\",\n \"max_tokens_recovery\",\n \"compact_retry\",\n \"transport_retry\",\n \"stop_hook_continuation\",\n \"budget_continuation\",\n)\n```\n\n这几种原因的本质不一样:\n\n- `tool_result_continuation`\n 是正常主线继续\n- `max_tokens_recovery`\n 是输出被截断后的恢复继续\n- `compact_retry`\n 是上下文处理后的恢复继续\n- `transport_retry`\n 是基础设施抖动后的恢复继续\n- `stop_hook_continuation`\n 是外部控制逻辑阻止本轮结束\n- `budget_continuation`\n 是系统主动利用预算继续推进\n\n### 3. Continuation Budget\n\n更完整的 query 状态不只会说“继续”,还会限制:\n\n- 最多续写几次\n- 最多压缩后重试几次\n- 某类恢复是不是已经尝试过\n\n例如:\n\n```python\nstate = {\n \"max_output_tokens_recovery_count\": 2,\n \"has_attempted_reactive_compact\": True,\n}\n```\n\n这些字段的本质都是:\n\n> continuation 不是无限制的。\n\n## 最小实现\n\n### 第一步:把 continue site 显式化\n\n很多初学者写主循环时,所有继续逻辑都长这样:\n\n```python\ncontinue\n```\n\n教学版应该往前走一步:\n\n```python\nstate[\"transition\"] = \"tool_result_continuation\"\ncontinue\n```\n\n### 第二步:不同继续原因,配不同状态修改\n\n```python\nif response.stop_reason == \"tool_use\":\n state[\"messages\"] = append_tool_results(...)\n state[\"turn_count\"] += 1\n state[\"transition\"] = \"tool_result_continuation\"\n continue\n\nif response.stop_reason == \"max_tokens\":\n state[\"messages\"].append({\n \"role\": \"user\",\n \"content\": CONTINUE_MESSAGE,\n })\n state[\"max_output_tokens_recovery_count\"] += 1\n state[\"transition\"] = \"max_tokens_recovery\"\n continue\n```\n\n重点不是“多写一行”。\n\n重点是:\n\n**每次继续之前,你都要知道自己做了什么状态更新,以及为什么继续。**\n\n### 第三步:把恢复继续和正常继续分开\n\n```python\nif should_retry_transport(error):\n time.sleep(backoff(...))\n state[\"transition\"] = \"transport_retry\"\n continue\n\nif should_recompact(error):\n state[\"messages\"] = compact_messages(state[\"messages\"])\n state[\"transition\"] = \"compact_retry\"\n continue\n```\n\n这时候你就开始得到一条非常清楚的控制链:\n\n```text\n继续\n 不再是一个动作\n 而是一类带原因的转移\n```\n\n## 一张真正应该建立的图\n\n```text\nquery loop\n |\n +-- tool executed --------------------> transition = tool_result_continuation\n |\n +-- output truncated -----------------> transition = max_tokens_recovery\n |\n +-- compact just happened -----------> transition = compact_retry\n |\n +-- network / transport retry -------> transition = transport_retry\n |\n +-- stop hook blocked termination ---> transition = stop_hook_continuation\n |\n +-- budget says keep going ----------> transition = budget_continuation\n```\n\n## 它和逆向仓库主脉络为什么对得上\n\n如果你去看更完整系统的查询入口,会发现它真正难的地方从来不是:\n\n- 再调一次模型\n\n而是:\n\n- 什么时候该继续\n- 继续前改哪份状态\n- 继续属于哪一种路径\n\n所以这篇桥接文档讲的,不是额外装饰,而是完整 query engine 的主骨架之一。\n\n## 它和主线章节怎么接\n\n- `s01` 让你先把 loop 跑起来\n- `s06` 让你知道为什么上下文管理会介入继续路径\n- `s11` 让你知道为什么恢复路径不是一种\n- 这篇则把“继续原因”统一抬成显式状态\n\n所以你可以把它理解成:\n\n> 给前后几章之间补上一条“为什么继续”的统一主线。\n\n## 初学者最容易犯的错\n\n### 1. 只有 `continue`,没有 `transition`\n\n这样日志和测试都会越来越难看。\n\n### 2. 把所有继续都当成一种\n\n这样会把:\n\n- 正常主线继续\n- 错误恢复继续\n- 压缩后重试\n\n全部混成一锅。\n\n### 3. 没有 continuation budget\n\n没有预算,系统就会在某些坏路径里无限试下去。\n\n### 4. 把 `transition` 写进消息文本,而不是流程状态\n\n消息是给模型看的。 \n`transition` 是给系统自己看的。\n\n### 5. 压缩、恢复、hook 都发生了,却没有统一的查询状态\n\n这会导致控制逻辑散落在很多局部变量里,越长越乱。\n\n## 教学边界\n\n这篇最重要的,不是一次枚举完所有 transition 名字,而是先让你守住三件事:\n\n- `continue` 最好总能对应一个显式的 `transition reason`\n- 正常继续、恢复继续、压缩后重试,不应该被混成同一种路径\n- continuation 需要预算和状态,而不是无限重来\n\n只要这三点成立,你就已经能把 `s01 / s06 / s11` 真正串成一条完整主线。 \n更细的 transition taxonomy、预算策略和日志分类,可以放到你把最小 query 状态机写稳以后再补。\n\n## 读完这一篇你应该能说清楚\n\n至少能完整说出这句话:\n\n> 一条 query 不是简单 while loop,而是一串显式 continuation reason 驱动的状态转移。\n\n如果这句话你已经能稳定说清,那么你再回头看 `s11`、`s19`,心智会顺很多。\n"
+ },
+ {
+ "version": null,
+ "slug": "s00d-chapter-order-rationale",
+ "locale": "zh",
+ "title": "s00d: Chapter Order Rationale (为什么是这个章节顺序)",
+ "kind": "bridge",
+ "filename": "s00d-chapter-order-rationale.md",
+ "content": "# s00d: Chapter Order Rationale (为什么是这个章节顺序)\n\n> 这份文档不讲某一个机制本身。 \n> 它专门回答一个更基础的问题:\n>\n> **为什么这套仓库要按现在这个顺序教,而不是按源码目录顺序、功能热闹程度,或者“哪里复杂先讲哪里”。**\n\n## 先说结论\n\n当前这套 `s01 -> s19` 的主线顺序,整体上是合理的。\n\n它最大的优点不是“覆盖面广”,而是:\n\n- 先建立最小闭环\n- 再补横切控制面\n- 再补持久化工作层\n- 最后才扩成多 agent 平台和外部能力总线\n\n这个顺序适合教学,因为它遵守的不是“源码文件先后”,而是:\n\n**机制依赖顺序。**\n\n也就是:\n\n- 后一章需要建立在前一章已经清楚的心智之上\n- 同一层的新概念尽量一起讲完\n- 不把高阶平台能力提前压给还没建立主闭环的读者\n\n如果要把这套课程改到更接近满分,一个很重要的标准不是“加更多内容”,而是:\n\n**让读者始终知道这一章为什么现在学,而不是上一章或下一章。**\n\n这份文档就是干这件事的。\n\n## 这份顺序到底按什么排\n\n不是按这些排:\n\n- 不是按逆向源码里文件顺序排\n- 不是按实现难度排\n- 不是按功能看起来酷不酷排\n- 不是按产品里出现得早不早排\n\n它真正按的是四条依赖线:\n\n1. `主闭环依赖`\n2. `控制面依赖`\n3. `工作状态依赖`\n4. `平台边界依赖`\n\n你可以先把整套课粗暴地看成下面这条线:\n\n```text\n先让 agent 能跑\n -> 再让它不乱跑\n -> 再让它能长期跑\n -> 最后让它能分工跑、隔离跑、接外部能力跑\n```\n\n这才是当前章节顺序最核心的逻辑。\n\n## 一张总图:章节之间真正的依赖关系\n\n```text\ns00 总览与地图\n |\n v\ns01 主循环\n ->\ns02 工具执行\n ->\ns03 会话计划\n ->\ns04 子任务隔离\n ->\ns05 按需知识注入\n ->\ns06 上下文压缩\n\ns06 之后,单 agent 主骨架成立\n |\n v\ns07 权限闸门\n ->\ns08 生命周期 Hook\n ->\ns09 跨会话记忆\n ->\ns10 Prompt / 输入装配\n ->\ns11 恢复与续行\n\ns11 之后,单 agent 的高完成度控制面成立\n |\n v\ns12 持久任务图\n ->\ns13 运行时后台槽位\n ->\ns14 时间触发器\n\ns14 之后,工作系统从“聊天过程”升级成“可持续运行时”\n |\n v\ns15 持久队友\n ->\ns16 协议化协作\n ->\ns17 自治认领\n ->\ns18 worktree 执行车道\n ->\ns19 外部能力总线\n```\n\n如果你记不住所有章节,只记住每段结束后的“系统里多了什么”:\n\n- `s06` 结束:你有了能工作的单 agent\n- `s11` 结束:你有了更稳、更可控的单 agent\n- `s14` 结束:你有了能长期推进工作的运行时\n- `s19` 结束:你有了接近完整的平台边界\n\n## 为什么 `s01-s06` 必须先成一整段\n\n### `s01` 必须最先\n\n因为它定义的是:\n\n- 这套系统的最小入口\n- 每一轮到底怎么推进\n- 工具结果为什么能再次进入模型\n\n如果连这一条都没建立,后面所有内容都会变成“往空气里挂功能”。\n\n### `s02` 必须紧跟 `s01`\n\n因为没有工具,agent 只是会说,不是真的会做。\n\n开发者第一次真正感受到“harness 在做什么”,往往就是在 `s02`:\n\n- 模型产出 `tool_use`\n- 系统找到 handler\n- 执行工具\n- 回写 `tool_result`\n\n这是整个仓库第一条真正的“行动回路”。\n\n### `s03` 放在 `s04` 前面是对的\n\n很多人会直觉上想先讲 subagent,因为它更“高级”。\n\n但教学上不该这样排。\n\n原因很简单:\n\n- `s03` 先解决“当前 agent 自己怎么不乱撞”\n- `s04` 再解决“哪些工作要交给别的执行者”\n\n如果主 agent 连本地计划都没有,就提前进入子 agent,读者只会觉得:\n\n- 为什么要委派\n- 委派和待办到底是什么关系\n- 哪些是主流程,哪些是探索性流程\n\n都不清楚。\n\n所以:\n\n**先有本地计划,再有上下文隔离委派。**\n\n### `s05` 放在 `s06` 前面是对的\n\n这两个章节很多人会低估。\n\n实际上它们解决的是同一类问题的前后两半:\n\n- `s05` 解决:知识不要一开始全塞进来\n- `s06` 解决:已经塞进来的上下文怎么控制体积\n\n如果先讲压缩,再讲技能加载,读者容易误会成:\n\n- 上下文膨胀主要靠“事后压缩”解决\n\n但更合理的心智应该是:\n\n1. 先减少不必要进入上下文的东西\n2. 再处理已经进入上下文、且必须继续保留的东西\n\n所以 `s05 -> s06` 的顺序很合理。\n\n## 为什么 `s07-s11` 应该成一整段“控制面加固”\n\n这五章看起来分散,实际上它们共同在回答同一个问题:\n\n**主循环已经能跑了,但要怎样才能跑得稳、跑得可控、跑得更像一个完整系统。**\n\n### `s07` 权限必须早于 `s08` Hook\n\n因为权限是在问:\n\n- 这件事能不能做\n- 这件事做到哪一步要停\n- 这件事要不要先问用户\n\nHook 是在问:\n\n- 系统这个时刻要不要额外做点什么\n\n如果先讲 Hook,再讲权限,读者很容易误会:\n\n- 安全判断也只是某个 hook\n\n但实际上不是。\n\n更清楚的教学顺序应该是:\n\n1. 先建立“执行前必须先过闸门”的概念\n2. 再建立“主循环周围可以挂扩展点”的概念\n\n也就是:\n\n**先 gate,再 extend。**\n\n### `s09` 记忆放在 `s10` Prompt 前面是对的\n\n这是整套课程里很关键的一条顺序。\n\n很多人容易反过来讲,先讲 prompt,再讲 memory。\n\n但对开发者心智更友好的顺序其实是现在这样:\n\n- `s09` 先讲“长期信息从哪里来、哪些值得留下”\n- `s10` 再讲“这些来源最终怎样被组装进模型输入”\n\n也就是说:\n\n- `memory` 先回答“内容源是什么”\n- `prompt pipeline` 再回答“这些内容源怎么装配”\n\n如果反过来,读者会在 `s10` 里不断追问:\n\n- 为什么这里会有 memory block\n- 这块内容到底是谁准备的\n- 它和 messages、CLAUDE.md、skills 的边界在哪里\n\n所以这一条顺序不要乱换。\n\n### `s11` 放在这一段结尾很合理\n\n因为恢复与续行不是单独一层业务功能,而是:\n\n- 对前面所有输入、执行、状态、权限、压缩分支的总回收\n\n它天然适合做“控制面阶段的收口章”。\n\n只有当读者已经知道:\n\n- 一轮输入怎么组装\n- 执行时会走哪些分支\n- 发生什么状态变化\n\n他才真正看得懂恢复系统在恢复什么。\n\n## 为什么 `s12-s14` 必须先讲“任务图”,再讲“后台运行”,最后讲“定时触发”\n\n这是后半程最容易排错的一段。\n\n### `s12` 必须先于 `s13`\n\n因为 `s12` 解决的是:\n\n- 事情本身是什么\n- 依赖关系是什么\n- 哪个工作节点已完成、未完成、阻塞中\n\n而 `s13` 解决的是:\n\n- 某个执行单元现在是不是正在后台跑\n- 跑到什么状态\n- 结果怎么回流\n\n也就是:\n\n- `task` 是工作目标\n- `runtime task` 是执行槽位\n\n如果没有 `s12` 先铺开 durable work graph,读者到了 `s13` 会把后台任务误当成任务系统本体。\n\n这会直接导致后面:\n\n- cron 概念混乱\n- teammate 认领概念混乱\n- worktree lane 概念混乱\n\n所以这里一定要守住:\n\n**先有目标,再有执行体。**\n\n### `s14` 必须紧跟 `s13`\n\n因为 cron 本质上不是又一种任务。\n\n它只是回答:\n\n**如果现在不是用户当场触发,而是由时间触发一次执行,该怎么接到现有运行时里。**\n\n也就是说:\n\n- 没有 runtime slot,cron 没地方发车\n- 没有 task graph,cron 不知道在触发什么工作\n\n所以最合理顺序一定是:\n\n`task graph -> runtime slot -> schedule trigger`\n\n## 为什么 `s15-s19` 要按“队友 -> 协议 -> 自治 -> 隔离车道 -> 外部能力”排\n\n这一段如果顺序乱了,读者最容易开始觉得:\n\n- 队友、协议、任务、worktree、MCP 全都像“高级功能堆叠”\n\n但其实它们之间有很强的前后依赖。\n\n### `s15` 先定义“谁在系统里长期存在”\n\n这一章先把对象立起来:\n\n- 队友是谁\n- 他们有没有身份\n- 他们是不是可以持续存在\n\n如果连 actor 都还没清楚,协议对象就无从谈起。\n\n### `s16` 再定义“这些 actor 之间按什么规则说话”\n\n协议层不应该早于 actor 层。\n\n因为协议不是凭空存在的。\n\n它一定是服务于:\n\n- 请求谁\n- 谁审批\n- 谁响应\n- 如何回执\n\n所以:\n\n**先有队友,再有协议。**\n\n### `s17` 再进入“队友自己找活”\n\n自治不是“又多一种 agent 功能”。\n\n自治其实是建立在前两章之上的:\n\n- 前提 1:队友是长期存在的\n- 前提 2:队友之间有可追踪的协作规则\n\n只有这两个前提都建立了,自治认领才不会讲成一团雾。\n\n### `s18` 为什么在 `s19` 前面\n\n因为在平台层里,worktree 是执行隔离边界,MCP 是能力边界。\n\n对开发者自己手搓系统来说,更应先搞清:\n\n- 多个执行者如何不互相踩目录\n- 一个任务与一个执行车道如何绑定\n\n这些是“本地多执行者平台”先要解决的问题。\n\n把这个问题讲完后,再去讲:\n\n- 外部 server\n- 外部 tool\n- capability route\n\n开发者才不会把“MCP 很强”误解成“本地平台边界可以先不管”。\n\n### `s19` 放最后是对的\n\n因为它本质上是平台边界的最外层。\n\n它关心的是:\n\n- 本地系统之外的能力如何并入\n- 外部 server 和本地 tool 如何统一纳入 capability bus\n\n这个东西只有在前面这些边界都已经清楚后,读者才真的能吸收:\n\n- 本地 actor\n- 本地 work lane\n- 本地 task / runtime state\n- 外部 capability provider\n\n分别是什么。\n\n## 五种最容易让课程变差的“错误重排”\n\n### 错误 1:把 `s04` 提到 `s03` 前面\n\n坏处:\n\n- 读者先学会“把活丢出去”\n- 却还没学会“本地怎么规划”\n\n最后 subagent 只会变成“遇事就开新 agent”的逃避按钮。\n\n### 错误 2:把 `s10` 提到 `s09` 前面\n\n坏处:\n\n- 输入装配先讲了\n- 但输入源的边界还没立住\n\n结果 prompt pipeline 会看起来像一堆神秘字符串拼接。\n\n### 错误 3:把 `s13` 提到 `s12` 前面\n\n坏处:\n\n- 读者会把后台执行槽位误认成工作任务本体\n- 后面 cron、自治认领、worktree 都会越来越混\n\n### 错误 4:把 `s17` 提到 `s15` 或 `s16` 前面\n\n坏处:\n\n- 还没定义持久队友\n- 也还没定义结构化协作规则\n- 就先讲自治认领\n\n最后“自治”会被理解成模糊的自动轮询魔法。\n\n### 错误 5:把 `s19` 提到 `s18` 前面\n\n坏处:\n\n- 读者会先被外部能力系统吸引注意力\n- 却还没真正看清本地多执行者平台怎么稳定成立\n\n这会让整个课程后半程“看起来很大”,但“落到自己实现时没有抓手”。\n\n## 如果你自己手搓,可以在哪些地方先停\n\n这套课不是说一定要一次把 `s01-s19` 全做完。\n\n更稳的实现节奏是:\n\n### 里程碑 A:先做到 `s06`\n\n你已经有:\n\n- 主循环\n- 工具\n- 计划\n- 子任务隔离\n- 技能按需注入\n- 上下文压缩\n\n这已经足够做出一个“能用的单 agent 原型”。\n\n### 里程碑 B:再做到 `s11`\n\n你多了:\n\n- 权限\n- Hook\n- Memory\n- Prompt pipeline\n- 错误恢复\n\n到这里,单 agent 系统已经接近“高完成度教学实现”。\n\n### 里程碑 C:做到 `s14`\n\n你多了:\n\n- durable task\n- background runtime slot\n- cron trigger\n\n到这里,系统开始脱离“只会跟着当前会话走”的状态。\n\n### 里程碑 D:做到 `s19`\n\n这时再进入:\n\n- persistent teammate\n- protocol\n- autonomy\n- worktree lane\n- MCP / plugin\n\n这时你手里才是接近完整的平台结构。\n\n## 维护者在重排章节前该问自己什么\n\n如果你准备改顺序,先问下面这些问题:\n\n1. 这一章依赖的前置概念,前面有没有已经讲清?\n2. 这次重排会不会让两个同名但不同层的概念更容易混?\n3. 这一章新增的是“目标状态”“运行状态”“执行者”还是“外部能力”?\n4. 如果把它提前,读者会不会只记住名词,反而抓不到最小实现?\n5. 这次重排是在服务开发者实现路径,还是只是在模仿某个源码目录顺序?\n6. 读者按当前章节学完以后,本地代码到底该按什么顺序打开,这条代码阅读顺序有没有一起讲清?\n\n如果第 5 个问题的答案偏向后者,那大概率不该改。\n\n## 一句话记住\n\n**好的章节顺序,不是把所有机制排成一列,而是让每一章都像前一章自然长出来的下一层。**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00e-reference-module-map",
+ "locale": "zh",
+ "title": "s00e: 参考仓库模块映射图",
+ "kind": "bridge",
+ "filename": "s00e-reference-module-map.md",
+ "content": "# s00e: 参考仓库模块映射图\n\n> 这是一份给维护者和认真学习者用的校准文档。 \n> 它不是让读者逐行读逆向源码。\n>\n> 它只回答一个很关键的问题:\n>\n> **如果把参考仓库里真正重要的模块簇,和当前教学仓库的章节顺序对照起来看,现在这套课程顺序到底合不合理?**\n\n## 先说结论\n\n合理。\n\n当前这套 `s01 -> s19` 的顺序,整体上是对的,而且比“按源码目录顺序讲”更接近真实系统的设计主干。\n\n原因很简单:\n\n- 参考仓库里目录很多\n- 但真正决定系统骨架的,是少数几簇控制、状态、任务、团队、隔离执行和外部能力模块\n- 这些高信号模块,和当前教学仓库的四阶段主线基本是对齐的\n\n所以正确动作不是把教程改成“跟着源码树走”。\n\n正确动作是:\n\n- 保留现在这条按依赖关系展开的主线\n- 把它和参考仓库的映射关系讲明白\n- 继续把低价值的产品外围细节挡在主线外\n\n## 这份对照是怎么做的\n\n这次对照主要看的是参考仓库里真正决定系统骨架的部分,例如:\n\n- `Tool.ts`\n- `state/AppStateStore.ts`\n- `coordinator/coordinatorMode.ts`\n- `memdir/*`\n- `services/SessionMemory/*`\n- `services/toolUseSummary/*`\n- `constants/prompts.ts`\n- `tasks/*`\n- `tools/TodoWriteTool/*`\n- `tools/AgentTool/*`\n- `tools/ScheduleCronTool/*`\n- `tools/EnterWorktreeTool/*`\n- `tools/ExitWorktreeTool/*`\n- `tools/MCPTool/*`\n- `services/mcp/*`\n- `plugins/*`\n- `hooks/toolPermission/*`\n\n这些已经足够判断“设计主脉络”。\n\n没有必要为了教学,再把每个命令目录、兼容分支、UI 细节和产品接线全部拖进正文。\n\n## 真正的映射关系\n\n| 参考仓库模块簇 | 典型例子 | 对应教学章节 | 为什么这样放是对的 |\n|---|---|---|---|\n| 查询主循环 + 控制状态 | `Tool.ts`、`AppStateStore.ts`、query / coordinator 状态 | `s00`、`s00a`、`s00b`、`s01`、`s11` | 真实系统绝不只是 `messages[] + while True`。教学上先讲最小循环,再补控制平面,是对的。 |\n| 工具路由与执行面 | `Tool.ts`、原生 tools、tool context、执行辅助逻辑 | `s02`、`s02a`、`s02b` | 参考仓库明确把 tools 做成统一执行面,不只是玩具版分发表。当前拆法是合理的。 |\n| 会话规划 | `TodoWriteTool` | `s03` | 这是“当前会话怎么不乱撞”的小结构,应该早于持久任务图。 |\n| 一次性委派 | `AgentTool` 的最小子集 | `s04` | 参考仓库的 agent 体系很大,但教学仓库先教“新上下文 + 子任务 + 摘要返回”这个最小正确版本,是对的。 |\n| 技能发现与按需加载 | `DiscoverSkillsTool`、`skills/*`、相关 prompt 片段 | `s05` | 技能不是花哨外挂,而是知识注入层,所以应早于 prompt 复杂化和上下文压力。 |\n| 上下文压力与压缩 | `services/toolUseSummary/*`、`services/contextCollapse/*`、compact 逻辑 | `s06` | 参考仓库明确存在显式压缩机制,把这一层放在平台化能力之前完全正确。 |\n| 权限闸门 | `types/permissions.ts`、`hooks/toolPermission/*`、审批处理器 | `s07` | 执行安全是明确闸门,不是“某个 hook 顺手干的事”,所以必须早于 hook。 |\n| Hook 与侧边扩展 | `types/hooks.ts`、hook runner、生命周期接线 | `s08` | 参考仓库把扩展点和权限分开。教学顺序保持“先 gate,再 extend”是对的。 |\n| 持久记忆选择 | `memdir/*`、`services/SessionMemory/*`、记忆提取与筛选 | `s09` | 参考仓库把 memory 处理成“跨会话、选择性装配”的层,不是通用笔记本。 |\n| Prompt 组装 | `constants/prompts.ts`、prompt sections、memory prompt 注入 | `s10`、`s10a` | 参考仓库明显把输入拆成多个 section。教学版把 prompt 讲成流水线,而不是一段大字符串,是正确的。 |\n| 恢复与续行 | query transition、retry 分支、compact retry、token recovery | `s11`、`s00c` | 真实系统里“为什么继续下一轮”是显式存在的,所以恢复应当晚于 loop / tools / compact / permissions / memory / prompt。 |\n| 持久工作图 | 任务记录、任务板、依赖解锁 | `s12` | 当前教程把“持久任务目标”和“会话内待办”分开,是对的。 |\n| 活着的运行时任务 | `tasks/types.ts`、`LocalShellTask`、`LocalAgentTask`、`RemoteAgentTask`、`MonitorMcpTask` | `s13`、`s13a` | 参考仓库里 runtime task 是明确的联合类型,这强烈证明 `TaskRecord` 和 `RuntimeTaskState` 必须分开教。 |\n| 定时触发 | `ScheduleCronTool/*`、`useScheduledTasks` | `s14` | 调度是建在 runtime work 之上的新启动条件,放在 `s13` 后非常合理。 |\n| 持久队友 | `InProcessTeammateTask`、team tools、agent registry | `s15` | 参考仓库清楚地从一次性 subagent 继续长成长期 actor。把 teammate 放到后段是对的。 |\n| 结构化团队协作 | send-message 流、request tracking、coordinator mode | `s16` | 协议必须建立在“已有持久 actor”之上,所以不能提前。 |\n| 自治认领与恢复 | coordinator mode、任务认领、异步 worker 生命周期、resume 逻辑 | `s17` | 参考仓库里的 autonomy 不是魔法,而是建立在 actor、任务和协议之上的。 |\n| Worktree 执行车道 | `EnterWorktreeTool`、`ExitWorktreeTool`、agent worktree 辅助逻辑 | `s18` | 参考仓库把 worktree 当作执行边界 + 收尾状态来处理。当前放在 tasks / teams 后是正确的。 |\n| 外部能力总线 | `MCPTool`、`services/mcp/*`、`plugins/*`、MCP resources / prompts / tools | `s19`、`s19a` | 参考仓库把 MCP / plugin 放在平台最外层边界。把它放最后是合理的。 |\n\n## 这份对照最能证明的 5 件事\n\n### 1. `s03` 应该继续放在 `s12` 前面\n\n参考仓库里同时存在:\n\n- 小范围的会话计划\n- 大范围的持久任务 / 运行时系统\n\n它们不是一回事。\n\n所以教学顺序应当继续保持:\n\n`会话内计划 -> 持久任务图`\n\n### 2. `s09` 应该继续放在 `s10` 前面\n\n参考仓库里的输入装配,明确把 memory 当成输入来源之一。\n\n也就是说:\n\n- `memory` 先回答“内容从哪里来”\n- `prompt pipeline` 再回答“这些内容怎么组装进去”\n\n所以先讲 `s09`,再讲 `s10`,顺序不要反过来。\n\n### 3. `s12` 必须早于 `s13`\n\n`tasks/types.ts` 这类运行时任务联合类型,是这次对照里最强的证据之一。\n\n它非常清楚地说明:\n\n- 持久化的工作目标\n- 当前活着的执行槽位\n\n必须是两层不同状态。\n\n如果先讲 `s13`,读者几乎一定会把这两层混掉。\n\n### 4. `s15 -> s16 -> s17` 的顺序是对的\n\n参考仓库里明确能看到:\n\n- 持久 actor\n- 结构化协作\n- 自治认领 / 恢复\n\n自治必须建立在前两者之上,所以当前顺序合理。\n\n### 5. `s18` 应该继续早于 `s19`\n\n参考仓库把 worktree 当作本地执行边界机制。\n\n这应该先于:\n\n- 外部能力提供者\n- MCP server\n- plugin 装配面\n\n被讲清。\n\n否则读者会误以为“外部能力系统比本地执行边界更核心”。\n\n## 这套教学仓库仍然不该抄进主线的内容\n\n参考仓库里有很多真实但不应该占据主线的内容,例如:\n\n- CLI 命令面的完整铺开\n- UI 渲染细节\n- 遥测与分析分支\n- 远程 / 企业产品接线\n- 平台兼容层\n- 文件名、函数名、行号级 trivia\n\n这些不是假的。\n\n但它们不该成为 0 到 1 教学路径的中心。\n\n## 当前教学最容易漂掉的地方\n\n### 1. 不要把 subagent 和 teammate 混成一个模糊概念\n\n参考仓库里的 `AgentTool` 横跨了:\n\n- 一次性委派\n- 后台 worker\n- 持久 worker / teammate\n- worktree 隔离 worker\n\n这恰恰说明教学仓库应该继续拆开讲:\n\n- `s04`\n- `s15`\n- `s17`\n- `s18`\n\n不要在早期就把这些东西混成一个“大 agent 能力”。\n\n### 2. 不要把 worktree 教成“只是 git 小技巧”\n\n参考仓库里有 closeout、resume、cleanup、dirty-check 等状态。\n\n所以 `s18` 必须继续讲清:\n\n- lane 身份\n- task 绑定\n- keep / remove 收尾\n- 恢复与清理\n\n而不是只讲 `git worktree add`。\n\n### 3. 不要把 MCP 缩成“远程 tools”\n\n参考仓库里明显不只有工具,还有:\n\n- resources\n- prompts\n- elicitation / connection state\n- plugin 中介层\n\n所以 `s19` 可以继续用 tools-first 的教学路径切入,但一定要补平台边界那一层地图。\n\n## 最终判断\n\n如果只拿“章节顺序是否贴近参考仓库的设计主干”这个问题来打分,那么当前这套顺序是过关而且方向正确的。\n\n真正还能继续加分的地方,不再是再做一次大重排,而是:\n\n- 把桥接文档补齐\n- 把实体边界讲得更硬\n- 把多语言内容统一到同一个心智层次\n- 让 web 页面把这套学习地图展示得更清楚\n\n## 一句话记住\n\n**最好的教学顺序,不是源码文件出现的顺序,而是一个初学实现者真正能顺着依赖关系把系统重建出来的顺序。**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00f-code-reading-order",
+ "locale": "zh",
+ "title": "s00f: 本仓库代码阅读顺序",
+ "kind": "bridge",
+ "filename": "s00f-code-reading-order.md",
+ "content": "# s00f: 本仓库代码阅读顺序\n\n> 这份文档不是让你“多看代码”。 \n> 它专门解决另一个问题:\n>\n> **当你已经知道章节顺序是对的以后,本仓库代码到底应该按什么顺序读,才不会把心智重新读乱。**\n\n## 先说结论\n\n不要这样读代码:\n\n- 不要从文件最长的那一章开始\n- 不要随机点一个你觉得“高级”的章节开始\n- 不要先钻 `web/` 再回头猜主线\n- 不要把 19 个 `agents/*.py` 当成一个源码池乱翻\n\n最稳的读法只有一句话:\n\n**文档顺着章节读,代码也顺着章节读。**\n\n而且每一章的代码,都先按同一个模板看:\n\n1. 先看状态结构\n2. 再看工具定义或注册表\n3. 再看“这一轮怎么推进”的主函数\n4. 最后才看 CLI 入口和试运行方式\n\n## 为什么需要这份文档\n\n很多读者不是看不懂某一章文字,而是会在真正打开代码以后重新乱掉。\n\n典型症状是:\n\n- 一上来先盯住 300 行以上的文件底部\n- 先看一堆 `run_*` 函数,却不知道它们挂在哪条主线上\n- 先看“最复杂”的平台章节,然后觉得前面的章节好像都太简单\n- 把 `task`、`runtime task`、`teammate`、`worktree` 在代码里重新混成一团\n\n这份阅读顺序就是为了防止这种情况。\n\n## 读每个 agent 文件时,都先按同一个模板\n\n不管你打开的是哪一章,本仓库里的 `agents/sXX_*.py` 都建议先按下面顺序读:\n\n### 第一步:先看文件头注释\n\n先回答两个问题:\n\n- 这一章到底在教什么\n- 它故意没有教什么\n\n如果连这一步都没建立,后面你会把每个函数都看成同等重要。\n\n### 第二步:先看状态结构或管理器类\n\n优先找这些东西:\n\n- `LoopState`\n- `PlanningState`\n- `CompactState`\n- `TaskManager`\n- `BackgroundManager`\n- `TeammateManager`\n- `WorktreeManager`\n\n原因很简单:\n\n**先知道系统到底记住了什么,后面才看得懂它为什么要这样流动。**\n\n### 第三步:再看工具列表或注册表\n\n优先找这些入口:\n\n- `TOOLS`\n- `TOOL_HANDLERS`\n- 各种 `run_*`\n- `build_tool_pool()`\n\n这一层回答的是:\n\n- 模型到底能调用什么\n- 这些调用会落到哪条执行面上\n\n### 第四步:最后才看主推进函数\n\n重点函数通常长这样:\n\n- `run_one_turn(...)`\n- `agent_loop(...)`\n- 某个 `handle_*`\n\n这一步要回答的是:\n\n- 这一章新机制到底接在主循环哪一环\n- 哪个分支是新增的\n- 新状态是在哪里写入、回流、继续的\n\n### 第五步:最后再看 `if __name__ == \"__main__\"`\n\nCLI 入口当然有用,但它不应该成为第一屏。\n\n因为它通常只是在做:\n\n- 读用户输入\n- 初始化状态\n- 调用 `agent_loop`\n\n真正决定一章心智主干的,不在这里。\n\n## 阶段 1:`s01-s06` 应该怎样读代码\n\n这一段不是在学“很多功能”,而是在学:\n\n**一个单 agent 主骨架到底怎样成立。**\n\n| 章节 | 文件 | 先看什么 | 再看什么 | 读完要确认什么 |\n|---|---|---|---|---|\n| `s01` | `agents/s01_agent_loop.py` | `LoopState` | `TOOLS` -> `execute_tool_calls()` -> `run_one_turn()` -> `agent_loop()` | 你已经能看懂 `messages -> model -> tool_result -> next turn` |\n| `s02` | `agents/s02_tool_use.py` | `safe_path()` | `run_read()` / `run_write()` / `run_edit()` -> `TOOL_HANDLERS` -> `agent_loop()` | 你已经能看懂“主循环不变,工具靠分发面增长” |\n| `s03` | `agents/s03_todo_write.py` | `PlanItem` / `PlanningState` / `TodoManager` | `todo` 相关 handler -> reminder 注入 -> `agent_loop()` | 你已经能看懂“会话计划状态”怎么外显化 |\n| `s04` | `agents/s04_subagent.py` | `AgentTemplate` | `run_subagent()` -> 父 `agent_loop()` | 你已经能看懂“子智能体首先是上下文隔离” |\n| `s05` | `agents/s05_skill_loading.py` | `SkillManifest` / `SkillDocument` / `SkillRegistry` | `get_descriptions()` / `get_content()` -> `agent_loop()` | 你已经能看懂“先发现、再按需加载” |\n| `s06` | `agents/s06_context_compact.py` | `CompactState` | `persist_large_output()` -> `micro_compact()` -> `compact_history()` -> `agent_loop()` | 你已经能看懂“压缩不是删历史,而是转移细节” |\n\n### 这一段最值得反复看的 3 个代码点\n\n1. `state` 在哪里第一次从“聊天内容”升级成“显式系统状态”\n2. `tool_result` 是怎么一直保持为统一回流接口的\n3. 新机制是怎样接进 `agent_loop()` 而不是把 `agent_loop()` 重写烂的\n\n### 这一段读完后,最好的动作\n\n不要立刻去看 `s07`。\n\n先自己从空目录手写一遍下面这些最小件:\n\n- 一个 loop\n- 一个 dispatch map\n- 一个会话计划状态\n- 一个一次性子任务隔离\n- 一个按需技能加载\n- 一个最小压缩层\n\n## 阶段 2:`s07-s11` 应该怎样读代码\n\n这一段不是在学“又多了五种功能”。\n\n它真正是在学:\n\n**单 agent 的控制面是怎样长出来的。**\n\n| 章节 | 文件 | 先看什么 | 再看什么 | 读完要确认什么 |\n|---|---|---|---|---|\n| `s07` | `agents/s07_permission_system.py` | `BashSecurityValidator` / `PermissionManager` | 权限判定入口 -> `run_bash()` -> `agent_loop()` | 你已经能看懂“先 gate,再 execute” |\n| `s08` | `agents/s08_hook_system.py` | `HookManager` | hook 注册与触发 -> `agent_loop()` | 你已经能看懂 hook 是固定时机的插口,不是散落 if |\n| `s09` | `agents/s09_memory_system.py` | `MemoryManager` / `DreamConsolidator` | `run_save_memory()` -> `build_system_prompt()` -> `agent_loop()` | 你已经能看懂 memory 是长期信息层,不是上下文垃圾桶 |\n| `s10` | `agents/s10_system_prompt.py` | `SystemPromptBuilder` | `build_system_reminder()` -> `agent_loop()` | 你已经能看懂输入是流水线,不是单块 prompt |\n| `s11` | `agents/s11_error_recovery.py` | `estimate_tokens()` / `auto_compact()` / `backoff_delay()` | 各恢复分支 -> `agent_loop()` | 你已经能看懂“恢复以后怎样继续下一轮” |\n\n### 这一段读代码时,最容易重新读乱的地方\n\n1. 把权限和 hook 混成一类\n2. 把 memory 和 prompt 装配混成一类\n3. 把 `s11` 看成很多异常判断,而不是“续行控制”\n\n如果你开始混,先回:\n\n- `docs/zh/s00a-query-control-plane.md`\n- `docs/zh/s10a-message-prompt-pipeline.md`\n- `docs/zh/s00c-query-transition-model.md`\n\n## 阶段 3:`s12-s14` 应该怎样读代码\n\n这一段开始,代码理解的关键不再是“工具多了什么”,而是:\n\n**系统第一次真正长出会话外工作状态和运行时槽位。**\n\n| 章节 | 文件 | 先看什么 | 再看什么 | 读完要确认什么 |\n|---|---|---|---|---|\n| `s12` | `agents/s12_task_system.py` | `TaskManager` | 任务创建、依赖、解锁 -> `agent_loop()` | 你已经能看懂 task 是持久工作图,不是 todo |\n| `s13` | `agents/s13_background_tasks.py` | `NotificationQueue` / `BackgroundManager` | 后台执行登记 -> 通知排空 -> `agent_loop()` | 你已经能看懂 background task 是运行槽位 |\n| `s14` | `agents/s14_cron_scheduler.py` | `CronLock` / `CronScheduler` | `cron_matches()` -> schedule 触发 -> `agent_loop()` | 你已经能看懂调度器只负责“未来何时开始” |\n\n### 这一段读代码时一定要守住的边界\n\n- `task` 是工作目标\n- `runtime task` 是正在跑的执行槽位\n- `schedule` 是何时触发工作\n\n只要这三层在代码里重新混掉,后面 `s15-s19` 会一起变难。\n\n## 阶段 4:`s15-s19` 应该怎样读代码\n\n这一段不要当成“功能狂欢”去读。\n\n它真正建立的是:\n\n**平台边界。**\n\n| 章节 | 文件 | 先看什么 | 再看什么 | 读完要确认什么 |\n|---|---|---|---|---|\n| `s15` | `agents/s15_agent_teams.py` | `MessageBus` / `TeammateManager` | 队友名册、邮箱、独立循环 -> `agent_loop()` | 你已经能看懂 teammate 是长期 actor,不是一次性 subagent |\n| `s16` | `agents/s16_team_protocols.py` | `RequestStore` / `TeammateManager` | `handle_shutdown_request()` / `handle_plan_review()` -> `agent_loop()` | 你已经能看懂 request-response + `request_id` |\n| `s17` | `agents/s17_autonomous_agents.py` | `RequestStore` / `TeammateManager` | `is_claimable_task()` / `claim_task()` / `ensure_identity_context()` -> `agent_loop()` | 你已经能看懂自治主线:空闲检查 -> 安全认领 -> 恢复工作 |\n| `s18` | `agents/s18_worktree_task_isolation.py` | `TaskManager` / `WorktreeManager` / `EventBus` | `worktree_enter` 相关生命周期 -> `agent_loop()` | 你已经能看懂 task 管目标,worktree 管执行车道 |\n| `s19` | `agents/s19_mcp_plugin.py` | `CapabilityPermissionGate` / `MCPClient` / `PluginLoader` / `MCPToolRouter` | `build_tool_pool()` / `handle_tool_call()` / `normalize_tool_result()` -> `agent_loop()` | 你已经能看懂外部能力如何接回同一控制面 |\n\n### 这一段最容易误读的地方\n\n1. 把 `s15` 的 teammate 当成 `s04` 的 subagent 放大版\n2. 把 `s17` 自治看成“agent 自己乱跑”\n3. 把 `s18` worktree 看成一个 git 小技巧\n4. 把 `s19` MCP 缩成“只是远程 tools”\n\n## 代码阅读时,哪些文件不要先看\n\n如果你的目标是建立主线心智,下面这些内容不要先看:\n\n- `web/` 里的可视化实现细节\n- `web/src/data/generated/*`\n- `.next/` 或其他构建产物\n- `agents/s_full.py`\n\n原因不是它们没价值。\n\n而是:\n\n- `web/` 解决的是展示与学习界面\n- `generated` 是抽取结果,不是机制本身\n- `s_full.py` 是整合参考,不适合第一次建立边界\n\n## 最推荐的“文档 + 代码 + 运行”循环\n\n每一章最稳的学习动作不是只看文档,也不是只看代码。\n\n推荐固定走这一套:\n\n1. 先读这一章正文\n2. 再读这一章的桥接资料\n3. 再打开对应 `agents/sXX_*.py`\n4. 按“状态 -> 工具 -> 主推进函数 -> CLI 入口”的顺序看\n5. 跑一次这章的 demo\n6. 自己从空目录重写一个最小版本\n\n只要你每章都这样走一次,代码理解会非常稳。\n\n## 初学者最容易犯的 6 个代码阅读错误\n\n### 1. 先看最长文件\n\n这通常只会先把自己看晕。\n\n### 2. 先盯 `run_bash()` 这种工具细节\n\n工具实现细节不是主干。\n\n### 3. 不先找状态结构\n\n这样你永远不知道系统到底记住了什么。\n\n### 4. 把 `agent_loop()` 当成唯一重点\n\n主循环当然重要,但每章真正新增的边界,往往在状态容器和分支入口。\n\n### 5. 读完代码不跑 demo\n\n不实际跑一次,很难建立“这一章到底新增了哪条回路”的感觉。\n\n### 6. 一口气连看三四章代码,不停下来自己重写\n\n这样最容易出现“我好像都看过,但其实自己不会写”的错觉。\n\n## 一句话记住\n\n**代码阅读顺序也必须服从教学顺序:先看边界,再看状态,再看主线如何推进,而不是随机翻源码。**\n"
},
{
"version": "s01",
+ "slug": "s01-the-agent-loop",
"locale": "zh",
- "title": "s01: The Agent Loop (Agent 循环)",
- "content": "# s01: The Agent Loop (Agent 循环)\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- 一个工具 + 一个循环 = 一个 Agent。\n\n## 问题\n\n语言模型能推理代码, 但碰不到真实世界 -- 不能读文件、跑测试、看报错。没有循环, 每次工具调用你都得手动把结果粘回去。你自己就是那个循环。\n\n## 解决方案\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\n一个退出条件控制整个流程。循环持续运行, 直到模型不再调用工具。\n\n## 工作原理\n\n1. 用户 prompt 作为第一条消息。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. 将消息和工具定义一起发给 LLM。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. 追加助手响应。检查 `stop_reason` -- 如果模型没有调用工具, 结束。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. 执行每个工具调用, 收集结果, 作为 user 消息追加。回到第 2 步。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n组装为一个完整函数:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n不到 30 行, 这就是整个 Agent。后面 11 个章节都在这个循环上叠加机制 -- 循环本身始终不变。\n\n## 变更内容\n\n| 组件 | 之前 | 之后 |\n|---------------|------------|--------------------------------|\n| Agent loop | (无) | `while True` + stop_reason |\n| Tools | (无) | `bash` (单一工具) |\n| Messages | (无) | 累积式消息列表 |\n| Control flow | (无) | `stop_reason != \"tool_use\"` |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n"
+ "title": "s01: The Agent Loop (智能体循环)",
+ "kind": "chapter",
+ "filename": "s01-the-agent-loop.md",
+ "content": "# s01: The Agent Loop (智能体循环)\n\n`s00 > [ s01 ] > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *没有循环,就没有 agent。* \n> 这一章先教你做出一个最小但正确的循环,再告诉你为什么后面还需要更完整的控制平面。\n\n## 这一章要解决什么问题\n\n语言模型本身只会“生成下一段内容”。\n\n它不会自己:\n\n- 打开文件\n- 运行命令\n- 观察报错\n- 把工具结果再接着用于下一步推理\n\n如果没有一层代码在中间反复做这件事:\n\n```text\n发请求给模型\n -> 发现模型想调工具\n -> 真的去执行工具\n -> 把结果再喂回模型\n -> 继续下一轮\n```\n\n那模型就只是一个“会说话的程序”,还不是一个“会干活的 agent”。\n\n所以这一章的核心目标只有一个:\n\n**把“模型 + 工具”连接成一个能持续推进任务的主循环。**\n\n## 先解释几个名词\n\n### 什么是 loop\n\n`loop` 就是循环。\n\n这里的意思不是“程序死循环”,而是:\n\n> 只要任务还没做完,系统就继续重复同一套步骤。\n\n### 什么是 turn\n\n`turn` 可以理解成“一轮”。\n\n最小版本里,一轮通常包含:\n\n1. 把当前消息发给模型\n2. 读取模型回复\n3. 如果模型调用了工具,就执行工具\n4. 把工具结果写回消息历史\n\n然后才进入下一轮。\n\n### 什么是 tool_result\n\n`tool_result` 就是工具执行结果。\n\n它不是随便打印在终端上的日志,而是:\n\n> 要重新写回对话历史、让模型下一轮真的能看见的结果块。\n\n### 什么是 state\n\n`state` 是“当前运行状态”。\n\n第一次看到这个词时,你可以先把它理解成:\n\n> 主循环继续往下走时,需要一直带着走的那份数据。\n\n最小版本里,最重要的状态就是:\n\n- `messages`\n- 当前是第几轮\n- 这一轮结束后为什么还要继续\n\n## 最小心智模型\n\n先把整个 agent 想成下面这条回路:\n\n```text\nuser message\n |\n v\nLLM\n |\n +-- 普通回答 ----------> 结束\n |\n +-- tool_use ----------> 执行工具\n |\n v\n tool_result\n |\n v\n 写回 messages\n |\n v\n 下一轮继续\n```\n\n这条图里最关键的,不是“有一个 while True”。\n\n真正关键的是这句:\n\n**工具结果必须重新进入消息历史,成为下一轮推理的输入。**\n\n如果少了这一步,模型就无法基于真实观察继续工作。\n\n## 关键数据结构\n\n### 1. Message\n\n最小教学版里,可以先把消息理解成:\n\n```python\n{\"role\": \"user\", \"content\": \"...\"}\n{\"role\": \"assistant\", \"content\": [...]}\n```\n\n这里最重要的不是字段名字,而是你要记住:\n\n**消息历史不是聊天记录展示层,而是模型下一轮要读的工作上下文。**\n\n### 2. Tool Result Block\n\n当工具执行完后,你要把它包装回消息流:\n\n```python\n{\n \"type\": \"tool_result\",\n \"tool_use_id\": \"...\",\n \"content\": \"...\",\n}\n```\n\n`tool_use_id` 的作用很简单:\n\n> 告诉模型“这条结果对应的是你刚才哪一次工具调用”。\n\n### 3. LoopState\n\n这章建议你不要只用一堆零散局部变量。\n\n最小也应该显式收拢出一个循环状态:\n\n```python\nstate = {\n \"messages\": [...],\n \"turn_count\": 1,\n \"transition_reason\": None,\n}\n```\n\n这里的 `transition_reason` 先只需要理解成:\n\n> 这一轮结束后,为什么要继续下一轮。\n\n最小教学版只用一种原因就够了:\n\n```python\n\"tool_result\"\n```\n\n也就是:\n\n> 因为刚执行完工具,所以要继续。\n\n后面到了控制面更完整的章节里,你会看到它逐渐长成更多种原因。 \n如果你想先看完整一点的形状,可以配合读:\n\n- [`s00a-query-control-plane.md`](./s00a-query-control-plane.md)\n\n## 最小实现\n\n### 第一步:准备初始消息\n\n用户的请求先进入 `messages`:\n\n```python\nmessages = [{\"role\": \"user\", \"content\": query}]\n```\n\n### 第二步:调用模型\n\n把消息历史、system prompt 和工具定义一起发给模型:\n\n```python\nresponse = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n)\n```\n\n### 第三步:追加 assistant 回复\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\n```\n\n这一步非常重要。\n\n很多初学者会只关心“最后有没有答案”,忽略把 assistant 回复本身写回历史。 \n这样一来,下一轮上下文就会断掉。\n\n### 第四步:如果模型调用了工具,就执行\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n### 第五步:把工具结果作为新消息写回去\n\n```python\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n然后下一轮重新发给模型。\n\n### 组合成一个完整循环\n\n```python\ndef agent_loop(state):\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=state[\"messages\"],\n tools=TOOLS,\n max_tokens=8000,\n )\n\n state[\"messages\"].append({\n \"role\": \"assistant\",\n \"content\": response.content,\n })\n\n if response.stop_reason != \"tool_use\":\n state[\"transition_reason\"] = None\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_tool(block)\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n\n state[\"messages\"].append({\"role\": \"user\", \"content\": results})\n state[\"turn_count\"] += 1\n state[\"transition_reason\"] = \"tool_result\"\n```\n\n这就是最小 agent loop。\n\n## 它如何接进整个系统\n\n从现在开始,后面所有章节本质上都在做同一件事:\n\n**往这个循环里增加新的状态、新的分支判断和新的执行能力。**\n\n例如:\n\n- `s02` 往里面接工具路由\n- `s03` 往里面接规划状态\n- `s06` 往里面接上下文压缩\n- `s07` 往里面接权限判断\n- `s11` 往里面接错误恢复\n\n所以请把这一章牢牢记成一句话:\n\n> agent 的核心不是“模型很聪明”,而是“系统持续把现实结果喂回模型”。\n\n## 为什么教学版先接受 `stop_reason == \"tool_use\"` 这个简化\n\n这一章里,我们先用:\n\n```python\nif response.stop_reason != \"tool_use\":\n return\n```\n\n这完全合理。\n\n因为初学者在第一章真正要学会的,不是所有复杂边界,而是:\n\n1. assistant 回复要写回历史\n2. tool_result 要写回历史\n3. 主循环要持续推进\n\n但你也要知道,这只是第一层简化。\n\n更完整的系统不会只依赖 `stop_reason`,还会自己维护更明确的续行状态。 \n这是后面要补的,不是这一章一开始就要背下来的东西。\n\n## 初学者最容易犯的错\n\n### 1. 把工具结果打印出来,但不写回 `messages`\n\n这样模型下一轮根本看不到真实执行结果。\n\n### 2. 只保存用户消息,不保存 assistant 消息\n\n这样上下文会断层,模型会越来越不像“接着刚才做”。\n\n### 3. 不给工具结果绑定 `tool_use_id`\n\n模型会分不清哪条结果对应哪次调用。\n\n### 4. 一上来就把流式、并发、恢复、压缩全塞进第一章\n\n这会让主线变得非常难学。\n\n第一章最重要的是先把最小回路搭起来。\n\n### 5. 以为 `messages` 只是聊天展示\n\n不是。\n\n在 agent 里,`messages` 更像“下一轮工作输入”。\n\n## 教学边界\n\n这一章只需要先讲透一件事:\n\n**Agent 之所以从“会说”变成“会做”,是因为模型输出能走到工具,工具结果又能回到下一轮模型输入。**\n\n所以教学仓库在这里要刻意停住:\n\n- 不要一开始就拉进 streaming、retry、budget、recovery\n- 不要一开始就混入权限、Hook、任务系统\n- 不要把第一章写成整套系统所有后续机制的总图\n\n如果读者已经能凭记忆写出 `messages -> model -> tool_result -> next turn` 这条回路,这一章就已经达标了。\n\n## 一句话记住\n\n**Agent Loop 的本质,是把“模型的动作意图”变成“真实执行结果”,再把结果送回模型继续推理。**\n"
},
{
"version": "s02",
+ "slug": "s02-tool-use",
"locale": "zh",
"title": "s02: Tool Use (工具使用)",
- "content": "# s02: Tool Use (工具使用)\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"加一个工具, 只加一个 handler\"* -- 循环不用动, 新工具注册进 dispatch map 就行。\n\n## 问题\n\n只有 `bash` 时, 所有操作都走 shell。`cat` 截断不可预测, `sed` 遇到特殊字符就崩, 每次 bash 调用都是不受约束的安全面。专用工具 (`read_file`, `write_file`) 可以在工具层面做路径沙箱。\n\n关键洞察: 加工具不需要改循环。\n\n## 解决方案\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 工作原理\n\n1. 每个工具有一个处理函数。路径沙箱防止逃逸工作区。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. dispatch map 将工具名映射到处理函数。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. 循环中按名称查找处理函数。循环体本身与 s01 完全一致。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n加工具 = 加 handler + 加 schema。循环永远不变。\n\n## 相对 s01 的变更\n\n| 组件 | 之前 (s01) | 之后 (s02) |\n|----------------|--------------------|--------------------------------|\n| Tools | 1 (仅 bash) | 4 (bash, read, write, edit) |\n| Dispatch | 硬编码 bash 调用 | `TOOL_HANDLERS` 字典 |\n| 路径安全 | 无 | `safe_path()` 沙箱 |\n| Agent loop | 不变 | 不变 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n"
+ "kind": "chapter",
+ "filename": "s02-tool-use.md",
+ "content": "# s02: Tool Use (工具使用)\n\n`s00 > s01 > [ s02 ] > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *\"加一个工具, 只加一个 handler\"* -- 循环不用动, 新工具注册进 dispatch map 就行。\n>\n> **Harness 层**: 工具分发 -- 扩展模型能触达的边界。\n\n## 问题\n\n只有 `bash` 时, 所有操作都走 shell。`cat` 截断不可预测, `sed` 遇到特殊字符就崩, 每次 bash 调用都是不受约束的安全面。专用工具 (`read_file`, `write_file`) 可以在工具层面做路径沙箱。\n\n关键洞察: 加工具不需要改循环。\n\n## 解决方案\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 工作原理\n\n1. 每个工具有一个处理函数。路径沙箱防止逃逸工作区。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. dispatch map 将工具名映射到处理函数。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. 循环中按名称查找处理函数。循环体本身与 s01 完全一致。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n加工具 = 加 handler + 加 schema。循环永远不变。\n\n## 相对 s01 的变更\n\n| 组件 | 之前 (s01) | 之后 (s02) |\n|----------------|--------------------|--------------------------------|\n| Tools | 1 (仅 bash) | 4 (bash, read, write, edit) |\n| Dispatch | 硬编码 bash 调用 | `TOOL_HANDLERS` 字典 |\n| 路径安全 | 无 | `safe_path()` 沙箱 |\n| Agent loop | 不变 | 不变 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n\n## 如果你开始觉得“工具不只是 handler map”\n\n到这里为止,教学主线先把工具讲成:\n\n- schema\n- handler\n- `tool_result`\n\n这是对的,而且必须先这么学。\n\n但如果你继续把系统做大,很快就会发现工具层还会继续长出:\n\n- 权限环境\n- 当前消息和 app state\n- MCP client\n- 文件读取缓存\n- 通知与 query 跟踪\n\n也就是说,在一个结构更完整的系统里,工具层最后会更像一条“工具控制平面”,而不只是一张分发表。\n\n这层不要抢正文主线。 \n你先把这一章吃透,再继续看:\n\n- [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md)\n\n## 消息规范化\n\n教学版的 `messages` 列表直接发给 API, 所见即所发。但当系统变复杂后 (工具超时、用户取消、压缩替换), 内部消息列表会出现 API 不接受的格式问题。需要在发送前做一次规范化。\n\n### 为什么需要\n\nAPI 协议有三条硬性约束:\n1. 每个 `tool_use` 块**必须**有匹配的 `tool_result` (通过 `tool_use_id` 关联)\n2. `user` / `assistant` 消息必须**严格交替** (不能连续两条同角色)\n3. 只接受协议定义的字段 (内部元数据会导致 400 错误)\n\n### 实现\n\n```python\ndef normalize_messages(messages: list) -> list:\n \"\"\"将内部消息列表规范化为 API 可接受的格式。\"\"\"\n normalized = []\n\n for msg in messages:\n # Step 1: 剥离内部字段\n clean = {\"role\": msg[\"role\"]}\n if isinstance(msg.get(\"content\"), str):\n clean[\"content\"] = msg[\"content\"]\n elif isinstance(msg.get(\"content\"), list):\n clean[\"content\"] = [\n {k: v for k, v in block.items()\n if k not in (\"_internal\", \"_source\", \"_timestamp\")}\n for block in msg[\"content\"]\n ]\n normalized.append(clean)\n\n # Step 2: tool_result 配对补齐\n # 收集所有已有的 tool_result ID\n existing_results = set()\n for msg in normalized:\n if isinstance(msg.get(\"content\"), list):\n for block in msg[\"content\"]:\n if block.get(\"type\") == \"tool_result\":\n existing_results.add(block.get(\"tool_use_id\"))\n\n # 找出缺失配对的 tool_use, 插入占位 result\n for msg in normalized:\n if msg[\"role\"] == \"assistant\" and isinstance(msg.get(\"content\"), list):\n for block in msg[\"content\"]:\n if (block.get(\"type\") == \"tool_use\"\n and block.get(\"id\") not in existing_results):\n # 在下一条 user 消息中补齐\n normalized.append({\"role\": \"user\", \"content\": [{\n \"type\": \"tool_result\",\n \"tool_use_id\": block[\"id\"],\n \"content\": \"(cancelled)\",\n }]})\n\n # Step 3: 合并连续同角色消息\n merged = [normalized[0]] if normalized else []\n for msg in normalized[1:]:\n if msg[\"role\"] == merged[-1][\"role\"]:\n # 合并内容\n prev = merged[-1]\n prev_content = prev[\"content\"] if isinstance(prev[\"content\"], list) \\\n else [{\"type\": \"text\", \"text\": prev[\"content\"]}]\n curr_content = msg[\"content\"] if isinstance(msg[\"content\"], list) \\\n else [{\"type\": \"text\", \"text\": msg[\"content\"]}]\n prev[\"content\"] = prev_content + curr_content\n else:\n merged.append(msg)\n\n return merged\n```\n\n在 agent loop 中, 每次 API 调用前运行:\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=system,\n messages=normalize_messages(messages), # 规范化后再发送\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n**关键洞察**: `messages` 列表是系统的内部表示, API 看到的是规范化后的副本。两者不是同一个东西。\n\n## 教学边界\n\n这一章最重要的,不是把完整工具运行时一次讲全,而是先讲清 3 个稳定点:\n\n- tool schema 是给模型看的说明\n- handler map 是代码里的分发入口\n- `tool_result` 是结果回流到主循环的统一出口\n\n只要这三点稳住,读者就已经能自己在不改主循环的前提下新增工具。\n\n权限、hook、并发、流式执行、外部工具来源这些后续层次当然重要,但都应该建立在这层最小分发模型之后。\n"
+ },
+ {
+ "version": null,
+ "slug": "s02a-tool-control-plane",
+ "locale": "zh",
+ "title": "s02a: Tool Control Plane (工具控制平面)",
+ "kind": "bridge",
+ "filename": "s02a-tool-control-plane.md",
+ "content": "# s02a: Tool Control Plane (工具控制平面)\n\n> 这篇桥接文档用来回答另一个关键问题:\n>\n> **为什么“工具系统”不只是一个 `tool_name -> handler` 的映射表?**\n\n## 这一篇为什么要存在\n\n`s02` 先教你工具注册和分发,这完全正确。 \n因为如果你一开始连工具调用都没做出来,后面的一切都无从谈起。\n\n但当系统长大以后,工具层会逐渐承载越来越多的责任:\n\n- 权限判断\n- MCP 接入\n- 通知发送\n- subagent / teammate 共享状态\n- file state cache\n- 当前消息和当前会话环境\n- 某些工具专属限制\n\n这时候,“工具层”就已经不是一张函数表了。\n\n它更像一条总线:\n\n**模型通过工具名发出动作意图,系统通过工具控制平面决定这条意图在什么环境里执行。**\n\n## 先解释几个名词\n\n### 什么是工具控制平面\n\n这里的“控制平面”可以继续沿用上一份桥接文档的理解:\n\n> 不直接做业务结果,而是负责协调工具如何执行的一层。\n\n它关心的问题不是“这个工具最后返回了什么”,而是:\n\n- 它在哪执行\n- 它有没有权限\n- 它可不可以访问某些共享状态\n- 它是本地工具还是外部工具\n\n### 什么是执行上下文\n\n执行上下文,就是工具运行时能看到的环境。\n\n例如:\n\n- 当前工作目录\n- 当前 app state\n- 当前消息列表\n- 当前权限模式\n- 当前可用 MCP client\n\n### 什么是能力来源\n\n不是所有工具都来自同一个地方。\n\n系统里常见的能力来源有:\n\n- 本地原生工具\n- MCP 外部工具\n- agent 工具\n- task / worktree / team 这类平台工具\n\n## 最小心智模型\n\n工具系统可以先画成 4 层:\n\n```text\n1. ToolSpec\n 模型看见的工具名字、描述、输入 schema\n\n2. Tool Router\n 根据工具名把请求送去正确的能力来源\n\n3. ToolUseContext\n 工具运行时能访问的共享环境\n\n4. Tool Result Envelope\n 把输出包装回主循环\n```\n\n最重要的升级点在第三层:\n\n**更完整系统的核心,不是 tool table,而是 ToolUseContext。**\n\n## 关键数据结构\n\n### 1. ToolSpec\n\n这还是最基础的结构:\n\n```python\ntool = {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {...},\n}\n```\n\n### 2. ToolDispatchMap\n\n```python\nhandlers = {\n \"read_file\": read_file,\n \"write_file\": write_file,\n \"bash\": run_bash,\n}\n```\n\n这依旧需要,但它不是全部。\n\n### 3. ToolUseContext\n\n教学版可以先做一个简化版本:\n\n```python\ntool_use_context = {\n \"tools\": handlers,\n \"permission_context\": {...},\n \"mcp_clients\": {},\n \"messages\": [...],\n \"app_state\": {...},\n \"notifications\": [],\n \"cwd\": \"...\",\n}\n```\n\n这个结构的关键点是:\n\n- 工具不再只拿到“输入参数”\n- 工具还能拿到“共享运行环境”\n\n### 4. ToolResultEnvelope\n\n不要把返回值只想成字符串。\n\n更稳妥的形状是:\n\n```python\nresult = {\n \"ok\": True,\n \"content\": \"...\",\n \"is_error\": False,\n \"attachments\": [],\n}\n```\n\n这样后面你才能平滑承接:\n\n- 普通文本结果\n- 结构化结果\n- 错误结果\n- 附件类结果\n\n## 为什么更完整的系统一定会出现 ToolUseContext\n\n想象两个系统。\n\n### 系统 A:只有 dispatch map\n\n```python\noutput = handlers[tool_name](**tool_input)\n```\n\n这适合最小 demo。\n\n### 系统 B:有 ToolUseContext\n\n```python\noutput = handlers[tool_name](tool_input, tool_use_context)\n```\n\n这个版本才更接近一个真实平台。\n\n因为工具现在不只是“做一个动作”,而是在一个复杂系统里做动作。\n\n例如:\n\n- `bash` 要看权限\n- `mcp__postgres__query` 要找对应 client\n- `agent` 工具要创建子执行环境\n- `task_output` 工具可能要写磁盘并发通知\n\n这些都要求它们共享同一个上下文总线。\n\n## 最小实现\n\n### 第一步:仍然保留 ToolSpec 和 handler\n\n这个主线不要丢。\n\n### 第二步:引入一个统一 context\n\n```python\nclass ToolUseContext:\n def __init__(self):\n self.handlers = {}\n self.permission_context = {}\n self.mcp_clients = {}\n self.messages = []\n self.app_state = {}\n self.notifications = []\n```\n\n### 第三步:让所有 handler 都能看到 context\n\n```python\ndef run_tool(tool_name: str, tool_input: dict, ctx: ToolUseContext):\n handler = ctx.handlers[tool_name]\n return handler(tool_input, ctx)\n```\n\n### 第四步:在 router 层分不同能力来源\n\n```python\ndef route_tool(tool_name: str, tool_input: dict, ctx: ToolUseContext):\n if tool_name.startswith(\"mcp__\"):\n return run_mcp_tool(tool_name, tool_input, ctx)\n return run_native_tool(tool_name, tool_input, ctx)\n```\n\n## 一张应该讲清楚的图\n\n```text\nLLM tool call\n |\n v\nTool Router\n |\n +-- native tools ----------> local handlers\n |\n +-- mcp tools -------------> mcp client\n |\n +-- agent/task/team tools --> platform handlers\n |\n v\n ToolUseContext\n - permissions\n - messages\n - app state\n - notifications\n - mcp clients\n```\n\n## 它和 `s02`、`s19` 的关系\n\n- `s02` 先教你工具调用为什么成立\n- 这篇解释更完整的系统里工具层为什么会长成一个控制平面\n- `s19` 再把 MCP 作为外部能力来源接进来\n\n也就是说:\n\n**MCP 不是另一套独立系统,而是 Tool Control Plane 的一个能力来源。**\n\n## 初学者最容易犯的错\n\n### 1. 以为工具上下文只是 `cwd`\n\n不是。\n\n更完整的系统里,工具上下文往往还包含权限、状态、外部连接和通知接口。\n\n### 2. 让每个工具自己去全局变量里找环境\n\n这样工具层会变得非常散。\n\n更清楚的做法,是显式传一个统一 context。\n\n### 3. 把本地工具和 MCP 工具拆成完全不同体系\n\n这会让系统边界越来越乱。\n\n更好的方式是:\n\n- 能力来源不同\n- 但都汇入统一 router 和统一 result envelope\n\n### 4. 把 tool result 永远当成纯字符串\n\n这样后面接附件、错误、结构化信息时会很别扭。\n\n## 教学边界\n\n这篇最重要的,不是把工具层做成一个庞大的企业总线,而是先把下面三层边界讲清:\n\n- tool call 不是直接执行,而是先进入统一调度入口\n- 工具 handler 不应该各自去偷拿环境,而应该共享一份显式 `ToolUseContext`\n- 本地工具、插件工具、MCP 工具可以来源不同,但结果都应该回到统一控制面\n\n类型化上下文、能力注册中心、大结果存储和更细的工具限额,都是你把这条最小控制总线讲稳以后再补的扩展。\n\n## 一句话记住\n\n**最小工具系统靠 dispatch map,更完整的工具系统靠 ToolUseContext 这条控制总线。**\n"
+ },
+ {
+ "version": null,
+ "slug": "s02b-tool-execution-runtime",
+ "locale": "zh",
+ "title": "s02b: Tool Execution Runtime (工具执行运行时)",
+ "kind": "bridge",
+ "filename": "s02b-tool-execution-runtime.md",
+ "content": "# s02b: Tool Execution Runtime (工具执行运行时)\n\n> 这篇桥接文档解决的不是“工具怎么注册”,而是:\n>\n> **当模型一口气发出多个工具调用时,系统到底按什么规则执行、并发、回写、合并上下文?**\n\n## 这一篇为什么要存在\n\n`s02` 先教你:\n\n- 工具 schema\n- dispatch map\n- tool_result 回流\n\n这完全正确。 \n因为工具调用先得成立,后面才谈得上复杂度。\n\n但系统一旦长大,真正棘手的问题会变成下面这些:\n\n- 多个工具能不能并行执行\n- 哪些工具必须串行\n- 工具执行过程中要不要先发进度消息\n- 并发工具的结果应该按完成顺序回写,还是按原始出现顺序回写\n- 工具执行会不会改共享上下文\n- 多个并发工具如果都要改上下文,最后怎么合并\n\n这些问题已经不是“工具注册”能解释的了。\n\n它们属于更深一层:\n\n**工具执行运行时。**\n\n## 先解释几个名词\n\n### 什么叫工具执行运行时\n\n这里的运行时,不是指编程语言 runtime。\n\n这里说的是:\n\n> 当工具真正开始执行时,系统用什么规则去调度、并发、跟踪和回写这些工具。\n\n### 什么叫 concurrency safe\n\n你可以先把它理解成:\n\n> 这个工具能不能和别的同类工具同时跑,而不会把共享状态搞乱。\n\n例如很多只读工具常常是 concurrency safe:\n\n- `read_file`\n- 某些搜索工具\n- 某些纯查询类 MCP 工具\n\n而很多写操作不是:\n\n- `write_file`\n- `edit_file`\n- 某些会改全局状态的工具\n\n### 什么叫 progress message\n\n有些工具跑得慢,不适合一直静默。\n\nprogress message 就是:\n\n> 工具还没结束,但系统先把“它正在做什么”告诉上层。\n\n### 什么叫 context modifier\n\n有些工具执行完不只是返回结果,还会修改共享环境。\n\n例如:\n\n- 更新通知队列\n- 更新 app state\n- 更新“哪些工具正在运行”\n\n这种“对共享上下文的修改动作”,就可以理解成 context modifier。\n\n## 最小心智模型\n\n先不要把工具执行想成:\n\n```text\ntool_use -> handler -> result\n```\n\n更接近真实可扩展系统的理解是:\n\n```text\ntool_use blocks\n ->\n按执行安全性分批\n ->\n每批决定串行还是并行\n ->\n执行过程中可能产出 progress\n ->\n最终按稳定顺序回写结果\n ->\n必要时再合并 context modifiers\n```\n\n这里最关键的升级点有两个:\n\n- 并发不是默认全开\n- 上下文修改不是谁先跑完谁先直接乱写\n\n## 关键数据结构\n\n### 1. ToolExecutionBatch\n\n教学版最小可以先用这样一个概念:\n\n```python\nbatch = {\n \"is_concurrency_safe\": True,\n \"blocks\": [tool_use_1, tool_use_2, tool_use_3],\n}\n```\n\n它的意义是:\n\n- 不是每个工具都单独处理\n- 系统会先把工具调用按可否并发分成一批一批\n\n### 2. TrackedTool\n\n如果你准备把执行层做得更稳、更清楚,建议显式跟踪每个工具:\n\n```python\ntracked_tool = {\n \"id\": \"toolu_01\",\n \"name\": \"read_file\",\n \"status\": \"queued\", # queued / executing / completed / yielded\n \"is_concurrency_safe\": True,\n \"pending_progress\": [],\n \"results\": [],\n \"context_modifiers\": [],\n}\n```\n\n这类结构的价值很大。\n\n因为系统终于开始能回答:\n\n- 哪些工具还在排队\n- 哪些已经开始\n- 哪些已经完成\n- 哪些已经先吐出了中间进度\n\n### 3. MessageUpdate\n\n工具执行过程中,不一定只有最终结果。\n\n最小可以先理解成:\n\n```python\nupdate = {\n \"message\": maybe_message,\n \"new_context\": current_context,\n}\n```\n\n更完整的执行层里,一个工具执行运行时往往会产出两类更新:\n\n- 要立刻往上游发的消息更新\n- 只影响内部共享环境的 context 更新\n\n### 4. Queued Context Modifiers\n\n这是最容易被忽略、但很关键的一层。\n\n在并发工具批次里,更稳的策略不是“谁先完成谁先改 context”,而是:\n\n> 先把 context modifier 暂存起来,最后按原始工具顺序统一合并。\n\n最小理解方式:\n\n```python\nqueued_context_modifiers = {\n \"toolu_01\": [modify_ctx_a],\n \"toolu_02\": [modify_ctx_b],\n}\n```\n\n## 最小实现\n\n### 第一步:先分清哪些工具能并发\n\n```python\ndef is_concurrency_safe(tool_name: str, tool_input: dict) -> bool:\n return tool_name in {\"read_file\", \"search_files\"}\n```\n\n### 第二步:先分批,再执行\n\n```python\nbatches = partition_tool_calls(tool_uses)\n\nfor batch in batches:\n if batch[\"is_concurrency_safe\"]:\n run_concurrently(batch[\"blocks\"])\n else:\n run_serially(batch[\"blocks\"])\n```\n\n### 第三步:并发批次先吐进度,再收最终结果\n\n```python\nfor update in run_concurrently(...):\n if update.get(\"message\"):\n yield update[\"message\"]\n```\n\n### 第四步:context modifier 不要乱序落地\n\n```python\nqueued_modifiers = {}\n\nfor update in concurrent_updates:\n if update.get(\"context_modifier\"):\n queued_modifiers[update[\"tool_id\"]].append(update[\"context_modifier\"])\n\nfor tool in original_batch_order:\n for modifier in queued_modifiers.get(tool[\"id\"], []):\n context = modifier(context)\n```\n\n这一步是整篇里最容易被低估,但其实最接近真实系统开始长出执行运行时的点之一。\n\n## 一张真正应该建立的图\n\n```text\ntool_use blocks\n |\n v\npartition by concurrency safety\n |\n +-- read-only / safe batch -----> concurrent execution\n | |\n | +-- progress updates\n | +-- final results\n | +-- queued context modifiers\n |\n +-- exclusive batch ------------> serial execution\n |\n +-- direct result + direct context update\n```\n\n## 为什么这层比“dispatch map”更接近真实系统主脉络\n\n最小 demo 里:\n\n```python\nhandlers[tool_name](tool_input)\n```\n\n就够了。\n\n但在更完整系统里,真正复杂的不是“找到 handler”。\n\n真正复杂的是:\n\n- 多工具之间如何共存\n- 哪些能并发\n- 并发时如何保证回写顺序稳定\n- 并发时如何避免共享 context 被抢写\n- 工具报错时是否中止其他工具\n\n所以这层讲的不是边角优化,而是:\n\n> 工具系统从“可调用”升级到“可调度”的关键一步。\n\n## 它和前后章节怎么接\n\n- `s02` 先教你工具为什么能被调用\n- [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md) 讲工具为什么会长成统一控制面\n- 这篇继续讲,工具真的开始运行以后,系统如何调度它们\n- `s07`、`s13`、`s19` 往后都还会继续用到这层心智\n\n尤其是:\n\n- 权限系统会影响工具能不能执行\n- 后台任务会影响工具是否立即结束\n- MCP / plugin 会让工具来源更多、执行形态更复杂\n\n## 初学者最容易犯的错\n\n### 1. 看到多个工具调用,就默认全部并发\n\n这样很容易把共享状态搞乱。\n\n### 2. 只按完成顺序回写结果\n\n如果你完全按“谁先跑完谁先写”,主循环看到的顺序会越来越不稳定。\n\n### 3. 并发工具直接同时改共享 context\n\n这会制造很多很难解释的隐性状态问题。\n\n### 4. 认为 progress message 是“可有可无的 UI 装饰”\n\n它其实会影响:\n\n- 上层何时知道工具还活着\n- 长工具调用期间用户是否困惑\n- streaming 执行体验是否稳定\n\n### 5. 只讲工具 schema,不讲工具调度\n\n这样读者最后只会“注册工具”,却不理解真实 agent 为什么还要长出工具执行运行时。\n\n## 教学边界\n\n这篇最重要的,不是把工具调度层一次讲成一个庞大 runtime,而是先让读者守住三件事:\n\n- 工具调用要先分批,而不是默认看到多个 `tool_use` 就全部并发\n- 并发执行和稳定回写是两件事,不应该混成一个动作\n- 共享 context 的修改最好先排队,再按稳定顺序统一合并\n\n只要这三条边界已经清楚,后面的权限、后台任务和 MCP 接入就都有地方挂。 \n更细的队列模型、取消策略、流式输出协议,都可以放到你把这条最小运行时自己手搓出来以后再补。\n\n## 读完这一篇你应该能说清楚\n\n至少能完整说出这句话:\n\n> 工具系统不只是 `tool_name -> handler`,它还需要一层执行运行时来决定哪些工具并发、哪些串行、结果如何回写、共享上下文如何稳定合并。\n\n如果这句话你已经能稳定说清,那么你对 agent 工具层的理解,就已经比“会注册几个工具”深一大层了。\n"
},
{
"version": "s03",
+ "slug": "s03-todo-write",
"locale": "zh",
- "title": "s03: TodoWrite (待办写入)",
- "content": "# s03: TodoWrite (待办写入)\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"没有计划的 agent 走哪算哪\"* -- 先列步骤再动手, 完成率翻倍。\n\n## 问题\n\n多步任务中, 模型会丢失进度 -- 重复做过的事、跳步、跑偏。对话越长越严重: 工具结果不断填满上下文, 系统提示的影响力逐渐被稀释。一个 10 步重构可能做完 1-3 步就开始即兴发挥, 因为 4-10 步已经被挤出注意力了。\n\n## 解决方案\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 工作原理\n\n1. TodoManager 存储带状态的项目。同一时间只允许一个 `in_progress`。\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo` 工具和其他工具一样加入 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nag reminder: 模型连续 3 轮以上不调用 `todo` 时注入提醒。\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos. \",\n })\n```\n\n\"同时只能有一个 in_progress\" 强制顺序聚焦。nag reminder 制造问责压力 -- 你不更新计划, 系统就追着你问。\n\n## 相对 s02 的变更\n\n| 组件 | 之前 (s02) | 之后 (s03) |\n|----------------|------------------|--------------------------------|\n| Tools | 4 | 5 (+todo) |\n| 规划 | 无 | 带状态的 TodoManager |\n| Nag 注入 | 无 | 3 轮后注入 `` |\n| Agent loop | 简单分发 | + rounds_since_todo 计数器 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n"
+ "title": "s03: TodoWrite (会话内规划)",
+ "kind": "chapter",
+ "filename": "s03-todo-write.md",
+ "content": "# s03: TodoWrite (会话内规划)\n\n`s00 > s01 > s02 > [ s03 ] > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *计划不是替模型思考,而是把“正在做什么”明确写出来。*\n\n## 这一章要解决什么问题\n\n到了 `s02`,agent 已经会读文件、写文件、跑命令。\n\n问题也马上出现了:\n\n- 多步任务容易走一步忘一步\n- 明明已经做过的检查,会重复再做\n- 一口气列出很多步骤后,很快又回到即兴发挥\n\n这是因为模型虽然“能想”,但它的当前注意力始终受上下文影响。 \n如果没有一块**显式、稳定、可反复更新**的计划状态,大任务就很容易漂。\n\n所以这一章要补上的,不是“更强的工具”,而是:\n\n**让 agent 把当前会话里的计划外显出来,并且持续更新。**\n\n## 先解释几个名词\n\n### 什么是会话内规划\n\n这里说的规划,不是长期项目管理,也不是磁盘上的任务系统。\n\n它更像:\n\n> 为了完成当前这次请求,先把接下来几步写出来,并在过程中不断更新。\n\n### 什么是 todo\n\n`todo` 在这一章里只是一个载体。\n\n你不要把它理解成“某个特定产品里的某个工具名”,更应该把它理解成:\n\n> 模型用来写入当前计划的一条入口。\n\n### 什么是 active step\n\n`active step` 可以理解成“当前正在做的那一步”。\n\n教学版里我们用 `in_progress` 表示它。 \n这么做的目的不是形式主义,而是帮助模型维持焦点:\n\n> 同一时间,先把一件事做完,再进入下一件。\n\n### 什么是提醒\n\n提醒不是替模型规划,而是当它连续几轮都忘记更新计划时,轻轻拉它回来。\n\n## 先立清边界:这章不是任务系统\n\n这是这一章最重要的边界。\n\n`s03` 讲的是:\n\n- 当前会话里的轻量计划\n- 用来帮助模型聚焦下一步\n- 可以随任务推进不断改写\n\n它**不是**:\n\n- 持久化任务板\n- 依赖图\n- 多 agent 共用的工作图\n- 后台运行时任务管理\n\n这些会在 `s12-s14` 再系统展开。\n\n如果你现在就把 `s03` 讲成完整任务平台,初学者会很快混淆:\n\n- “当前这一步要做什么”\n- “整个系统长期还有哪些工作项”\n\n## 最小心智模型\n\n把这一章先想成一个很简单的结构:\n\n```text\n用户提出大任务\n |\n v\n模型先写一份当前计划\n |\n v\n计划状态\n - [ ] 还没做\n - [>] 正在做\n - [x] 已完成\n |\n v\n每做完一步,就更新计划\n```\n\n更具体一点:\n\n```text\n1. 先拆几步\n2. 选一项作为当前 active step\n3. 做完后标记 completed\n4. 把下一项改成 in_progress\n5. 如果好几轮没更新,系统提醒一下\n```\n\n这就是最小版本最该教清楚的部分。\n\n## 关键数据结构\n\n### 1. PlanItem\n\n最小条目可以长这样:\n\n```python\n{\n \"content\": \"Read the failing test\",\n \"status\": \"pending\" | \"in_progress\" | \"completed\",\n \"activeForm\": \"Reading the failing test\",\n}\n```\n\n这里的字段分别表示:\n\n- `content`:这一步要做什么\n- `status`:这一步现在处在什么状态\n- `activeForm`:当它正在进行中时,可以用更自然的进行时描述\n\n### 2. PlanningState\n\n除了计划条目本身,还应该有一点最小运行状态:\n\n```python\n{\n \"items\": [...],\n \"rounds_since_update\": 0,\n}\n```\n\n`rounds_since_update` 的意思很简单:\n\n> 连续多少轮过去了,模型还没有更新这份计划。\n\n### 3. 状态约束\n\n教学版推荐先立一条简单规则:\n\n```text\n同一时间,最多一个 in_progress\n```\n\n这不是宇宙真理。 \n它只是一个非常适合初学者的教学约束:\n\n**强制模型聚焦当前一步。**\n\n## 最小实现\n\n### 第一步:准备一个计划管理器\n\n```python\nclass TodoManager:\n def __init__(self):\n self.items = []\n```\n\n### 第二步:允许模型整体更新当前计划\n\n```python\ndef update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\n \"content\": item[\"content\"],\n \"status\": status,\n \"activeForm\": item.get(\"activeForm\", \"\"),\n })\n\n if in_progress_count > 1:\n raise ValueError(\"Only one item can be in_progress\")\n\n self.items = validated\n return self.render()\n```\n\n教学版让模型“整份重写”当前计划,比做一堆局部增删改更容易理解。\n\n### 第三步:把计划渲染成可读文本\n\n```python\ndef render(self) -> str:\n lines = []\n for item in self.items:\n marker = {\n \"pending\": \"[ ]\",\n \"in_progress\": \"[>]\",\n \"completed\": \"[x]\",\n }[item[\"status\"]]\n lines.append(f\"{marker} {item['content']}\")\n return \"\\n\".join(lines)\n```\n\n### 第四步:把 `todo` 接成一个工具\n\n```python\nTOOL_HANDLERS = {\n \"read_file\": run_read,\n \"write_file\": run_write,\n \"edit_file\": run_edit,\n \"bash\": run_bash,\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n### 第五步:如果连续几轮没更新计划,就提醒\n\n```python\nif rounds_since_update >= 3:\n results.insert(0, {\n \"type\": \"text\",\n \"text\": \"Refresh your plan before continuing. \",\n })\n```\n\n这一步的核心意义不是“催促”本身,而是:\n\n> 系统开始把“计划状态是否失活”也看成主循环的一部分。\n\n## 它如何接到主循环里\n\n这一章以后,主循环不再只维护:\n\n- `messages`\n\n还开始维护一份额外的会话状态:\n\n- `PlanningState`\n\n也就是说,agent loop 现在不只是在“对话”。\n\n它还在维持一块当前工作面板:\n\n```text\nmessages -> 模型看到的历史\nplanning state -> 当前计划的显式外部状态\n```\n\n这就是这一章真正想让你学会的升级:\n\n**把“当前要做什么”从模型脑内,移到系统可观察的状态里。**\n\n## 为什么这章故意不讲成任务图\n\n因为这里的重点是:\n\n- 帮模型聚焦下一步\n- 让当前进度变得外显\n- 给主循环一个“过程性状态”\n\n而不是:\n\n- 任务依赖\n- 长期持久化\n- 多人协作任务板\n- 后台运行槽位\n\n如果你已经开始关心这些问题,说明你快进入:\n\n- [`s12-task-system.md`](./s12-task-system.md)\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n\n## 初学者最容易犯的错\n\n### 1. 把计划写得过长\n\n计划不是越多越好。\n\n如果一上来列十几步,模型很快就会失去维护意愿。\n\n### 2. 不区分“当前一步”和“未来几步”\n\n如果同时有很多个 `in_progress`,焦点就会散。\n\n### 3. 把会话计划当成长期任务系统\n\n这会让 `s03` 和 `s12` 的边界完全混掉。\n\n### 4. 只在开始时写一次计划,后面从不更新\n\n那这份计划就失去价值了。\n\n### 5. 以为 reminder 是可有可无的小装饰\n\n不是。\n\n提醒机制说明了一件很重要的事:\n\n> 主循环不仅要执行动作,还要维护动作过程中的结构化状态。\n\n## 教学边界\n\n这一章讲的是:\n\n**会话里的外显计划状态。**\n\n它还不是后面那种持久任务系统,所以边界要守住:\n\n- 这里的 `todo` 只服务当前会话,不负责跨阶段持久化\n- `{id, text, status}` 这种小结构已经够教会核心模式\n- reminder 直接一点没问题,重点是让模型持续更新计划\n\n这一章真正要让读者看见的是:\n\n**当计划进入结构化状态,而不是散在自然语言里时,agent 的漂移会明显减少。**\n\n## 一句话记住\n\n**`s03` 的 todo,不是任务平台,而是当前会话里的“外显计划状态”。**\n"
},
{
"version": "s04",
+ "slug": "s04-subagent",
"locale": "zh",
- "title": "s04: Subagents (Subagent)",
- "content": "# s04: Subagents (Subagent)\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"大任务拆小, 每个小任务干净的上下文\"* -- Subagent 用独立 messages[], 不污染主对话。\n\n## 问题\n\nAgent 工作越久, messages 数组越胖。每次读文件、跑命令的输出都永久留在上下文里。\"这个项目用什么测试框架?\" 可能要读 5 个文件, 但父 Agent 只需要一个词: \"pytest。\"\n\n## 解决方案\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## 工作原理\n\n1. 父 Agent 有一个 `task` 工具。Subagent 拥有除 `task` 外的所有基础工具 (禁止递归生成)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. Subagent 以 `messages=[]` 启动, 运行自己的循环。只有最终文本返回给父 Agent。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\nSubagent 可能跑了 30+ 次工具调用, 但整个消息历史直接丢弃。父 Agent 收到的只是一段摘要文本, 作为普通 `tool_result` 返回。\n\n## 相对 s03 的变更\n\n| 组件 | 之前 (s03) | 之后 (s04) |\n|----------------|------------------|-------------------------------|\n| Tools | 5 | 5 (基础) + task (仅父端) |\n| 上下文 | 单一共享 | 父 + 子隔离 |\n| Subagent | 无 | `run_subagent()` 函数 |\n| 返回值 | 不适用 | 仅摘要文本 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n"
+ "title": "s04: Subagents (子智能体)",
+ "kind": "chapter",
+ "filename": "s04-subagent.md",
+ "content": "# s04: Subagents (子智能体)\n\n`s00 > s01 > s02 > s03 > [ s04 ] > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *一个大任务,不一定要塞进一个上下文里做完。*\n\n## 这一章到底要解决什么问题\n\n当 agent 连续做很多事时,`messages` 会越来越长。\n\n比如用户只问:\n\n> “这个项目用什么测试框架?”\n\n但 agent 可能为了回答这个问题:\n\n- 读了 `pyproject.toml`\n- 读了 `requirements.txt`\n- 搜了 `pytest`\n- 跑了测试命令\n\n真正有价值的最终答案,可能只有一句话:\n\n> “这个项目主要用 `pytest`。”\n\n如果这些中间过程都永久堆在父对话里,后面的问题会越来越难回答,因为上下文被大量局部任务的噪声填满了。\n\n这就是子智能体要解决的问题:\n\n**把局部任务放进独立上下文里做,做完只把必要结果带回来。**\n\n## 先解释几个名词\n\n### 什么是“父智能体”\n\n当前正在和用户对话、持有主 `messages` 的 agent,就是父智能体。\n\n### 什么是“子智能体”\n\n父智能体临时派生出来,专门处理某个子任务的 agent,就是子智能体。\n\n### 什么叫“上下文隔离”\n\n意思是:\n\n- 父智能体有自己的 `messages`\n- 子智能体也有自己的 `messages`\n- 子智能体的中间过程不会自动写回父智能体\n\n## 最小心智模型\n\n```text\nParent agent\n |\n | 1. 决定把一个局部任务外包出去\n v\nSubagent\n |\n | 2. 在自己的上下文里读文件 / 搜索 / 执行工具\n v\nSummary\n |\n | 3. 只把最终摘要或结果带回父智能体\n v\nParent agent continues\n```\n\n最重要的点只有一个:\n\n**子智能体的价值,不是“多一个模型实例”本身,而是“多一个干净上下文”。**\n\n## 最小实现长什么样\n\n### 第一步:给父智能体一个 `task` 工具\n\n父智能体需要一个工具,让模型可以主动说:\n\n> “这个子任务我想交给一个独立上下文去做。”\n\n最小 schema 可以非常简单:\n\n```python\n{\n \"name\": \"task\",\n \"description\": \"Run a subtask in a clean context and return a summary.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"prompt\": {\"type\": \"string\"}\n },\n \"required\": [\"prompt\"]\n }\n}\n```\n\n### 第二步:子智能体使用自己的消息列表\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n ...\n```\n\n这就是隔离的关键。\n\n不是共享父智能体的 `messages`,而是从一份新的列表开始。\n\n### 第三步:子智能体只拿必要工具\n\n子智能体通常不需要拥有和父智能体完全一样的能力。\n\n最小版本里,常见做法是:\n\n- 给它文件读取、搜索、bash 之类的基础工具\n- 不给它继续派生子智能体的能力\n\n这样可以防止它无限递归。\n\n### 第四步:只把结果带回父智能体\n\n子智能体做完事后,不把全部内部历史写回去,而是返回一段总结。\n\n```python\nreturn {\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": summary_text,\n}\n```\n\n## 这一章最关键的数据结构\n\n如果你只记一个结构,就记这个:\n\n```python\nclass SubagentContext:\n messages: list\n tools: list\n handlers: dict\n max_turns: int\n```\n\n解释一下:\n\n- `messages`:子智能体自己的上下文\n- `tools`:子智能体可以调用哪些工具\n- `handlers`:这些工具到底对应哪些 Python 函数\n- `max_turns`:防止子智能体无限跑\n\n这就是最小子智能体的骨架。\n\n## 为什么它真的有用\n\n### 用处 1:给父上下文减负\n\n局部任务的中间噪声不会全都留在主对话里。\n\n### 用处 2:让任务描述更清楚\n\n一个子智能体接到的 prompt 可以非常聚焦:\n\n- “读完这几个文件,给我一句总结”\n- “检查这个目录里有没有测试”\n- “对这个函数写一个最小修复”\n\n### 用处 3:让后面的多 agent 协作有基础\n\n你可以把子智能体理解成多 agent 系统的最小起点。\n\n先把一次性子任务外包做明白,后面再升级到长期 teammate、任务认领、团队协议,会顺很多。\n\n## 从 0 到 1 的实现顺序\n\n推荐按这个顺序写:\n\n### 版本 1:空白上下文子智能体\n\n先只实现:\n\n- 一个 `task` 工具\n- 一个 `run_subagent(prompt)` 函数\n- 子智能体自己的 `messages`\n- 子智能体最后返回摘要\n\n这已经够了。\n\n### 版本 2:限制工具集\n\n给子智能体一个更小、更安全的工具集。\n\n比如:\n\n- 允许 `read_file`\n- 允许 `grep`\n- 允许只读 bash\n- 不允许 `task`\n\n### 版本 3:加入最大轮数和失败保护\n\n至少补两个保护:\n\n- 最多跑多少轮\n- 工具出错时怎么退出\n\n### 版本 4:再考虑 fork\n\n只有当你已经稳定跑通前面三步,才考虑 fork。\n\n## 什么是 fork,为什么它是“下一步”,不是“起步”\n\n前面的最小实现是:\n\n- 子智能体从空白上下文开始\n\n这叫最朴素的子智能体。\n\n但有时一个子任务必须知道父智能体之前在聊什么。\n\n例如:\n\n> “基于我们刚才已经讨论出来的方案,去补测试。”\n\n这时可以用 `fork`:\n\n- 不是从空白 `messages` 开始\n- 而是先复制父智能体的已有上下文,再追加子任务 prompt\n\n```python\nsub_messages = list(parent_messages)\nsub_messages.append({\"role\": \"user\", \"content\": prompt})\n```\n\n这就是 fork 的本质:\n\n**继承上下文,而不是重头开始。**\n\n## 初学者最容易踩的坑\n\n### 坑 1:把子智能体当成“为了炫技的并发”\n\n子智能体首先是为了解决上下文问题,不是为了展示“我有很多 agent”。\n\n### 坑 2:把父历史全部原样灌回去\n\n如果你最后又把子智能体全量历史粘回父对话,那隔离价值就几乎没了。\n\n### 坑 3:一上来就做特别复杂的角色系统\n\n比如一开始就加:\n\n- explorer\n- reviewer\n- planner\n- tester\n- implementer\n\n这些都可以做,但不应该先做。\n\n先把“一个干净上下文的子任务执行器”做对,后面角色化只是在它上面再包一层。\n\n### 坑 4:忘记给子智能体设置停止条件\n\n如果没有:\n\n- 最大轮数\n- 异常处理\n- 工具过滤\n\n子智能体很容易无限转。\n\n## 教学边界\n\n这章要先打牢的,不是“多 agent 很高级”,而是:\n\n**子智能体首先是一个上下文边界。**\n\n所以教学版先停在这里就够了:\n\n- 一次性子任务就够\n- 摘要返回就够\n- 新 `messages` + 工具过滤就够\n\n不要提前把 `fork`、后台运行、transcript 持久化、worktree 绑定一起塞进来。\n\n真正该守住的顺序仍然是:\n\n**先做隔离,再做高级化。**\n\n## 和后续章节的关系\n\n- `s04` 解决的是“局部任务的上下文隔离”\n- `s15-s17` 解决的是“多个长期角色如何协作”\n- `s18` 解决的是“多个执行者如何在文件系统层面隔离”\n\n它们不是重复关系,而是递进关系。\n\n## 这一章学完后,你应该能回答\n\n- 为什么大任务不应该总塞在一个 `messages` 里?\n- 子智能体最小版为什么只需要独立上下文和摘要返回?\n- fork 是什么,为什么它不该成为第一步?\n- 为什么子智能体的第一价值是“减噪”,而不是“炫多 agent”?\n\n---\n\n**一句话记住:子智能体的核心,不是多一个角色,而是多一个干净上下文。**\n"
},
{
"version": "s05",
+ "slug": "s05-skill-loading",
"locale": "zh",
- "title": "s05: Skills (Skill 加载)",
- "content": "# s05: Skills (Skill 加载)\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"用到什么知识, 临时加载什么知识\"* -- 通过 tool_result 注入, 不塞 system prompt。\n\n## 问题\n\n你希望 Agent 遵循特定领域的工作流: git 约定、测试模式、代码审查清单。全塞进系统提示太浪费 -- 10 个 Skill, 每个 2000 token, 就是 20,000 token, 大部分跟当前任务毫无关系。\n\n## 解决方案\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n第一层: 系统提示中放 Skill 名称 (低成本)。第二层: tool_result 中按需放完整内容。\n\n## 工作原理\n\n1. 每个 Skill 是一个目录, 包含 `SKILL.md` 文件和 YAML frontmatter。\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoader 递归扫描 `SKILL.md` 文件, 用目录名作为 Skill 标识。\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n \"\n```\n\n3. 第一层写入系统提示。第二层不过是 dispatch map 中的又一个工具。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\n模型知道有哪些 Skill (便宜), 需要时再加载完整内容 (贵)。\n\n## 相对 s04 的变更\n\n| 组件 | 之前 (s04) | 之后 (s05) |\n|----------------|------------------|--------------------------------|\n| Tools | 5 (基础 + task) | 5 (基础 + load_skill) |\n| 系统提示 | 静态字符串 | + Skill 描述列表 |\n| 知识库 | 无 | skills/\\*/SKILL.md 文件 |\n| 注入方式 | 无 | 两层 (系统提示 + result) |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n"
+ "title": "s05: Skills (按需知识加载)",
+ "kind": "chapter",
+ "filename": "s05-skill-loading.md",
+ "content": "# s05: Skills (按需知识加载)\n\n`s00 > s01 > s02 > s03 > s04 > [ s05 ] > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *不是把所有知识永远塞进 prompt,而是在需要的时候再加载正确那一份。*\n\n## 这一章要解决什么问题\n\n到了 `s04`,你的 agent 已经会:\n\n- 调工具\n- 做会话内规划\n- 把大任务分给子 agent\n\n接下来很自然会遇到另一个问题:\n\n> 不同任务需要的领域知识不一样。\n\n例如:\n\n- 做代码审查,需要一套审查清单\n- 做 Git 操作,需要一套提交约定\n- 做 MCP 集成,需要一套专门步骤\n\n如果你把这些知识包全部塞进 system prompt,就会出现两个问题:\n\n1. 大部分 token 都浪费在当前用不到的说明上\n2. prompt 越来越臃肿,主线规则越来越不清楚\n\n所以这一章真正要做的是:\n\n**把“长期可选知识”从 system prompt 主体里拆出来,改成按需加载。**\n\n## 先解释几个名词\n\n### 什么是 skill\n\n这里的 `skill` 可以先简单理解成:\n\n> 一份围绕某类任务的可复用说明书。\n\n它通常会告诉 agent:\n\n- 什么时候该用它\n- 做这类任务时有哪些步骤\n- 有哪些注意事项\n\n### 什么是 discovery\n\n`discovery` 指“发现有哪些 skill 可用”。\n\n这一层只需要很轻量的信息,例如:\n\n- skill 名字\n- 一句描述\n\n### 什么是 loading\n\n`loading` 指“把某个 skill 的完整正文真正读进来”。\n\n这一层才是昂贵的,因为它会把完整内容放进当前上下文。\n\n## 最小心智模型\n\n把这一章先理解成两层:\n\n```text\n第 1 层:轻量目录\n - skill 名称\n - skill 描述\n - 让模型知道“有哪些可用”\n\n第 2 层:按需正文\n - 只有模型真正需要时才加载\n - 通过工具结果注入当前上下文\n```\n\n可以画成这样:\n\n```text\nsystem prompt\n |\n +-- Skills available:\n - code-review: review checklist\n - git-workflow: branch and commit guidance\n - mcp-builder: build an MCP server\n```\n\n当模型判断自己需要某份知识时:\n\n```text\nload_skill(\"code-review\")\n |\n v\ntool_result\n |\n v\n\n完整审查说明\n \n```\n\n这就是这一章最核心的设计。\n\n## 关键数据结构\n\n### 1. SkillManifest\n\n先准备一份很轻的元信息:\n\n```python\n{\n \"name\": \"code-review\",\n \"description\": \"Checklist for reviewing code changes\",\n}\n```\n\n它的作用只是让模型知道:\n\n> 这份 skill 存在,并且大概是干什么的。\n\n### 2. SkillDocument\n\n真正被加载时,再读取完整内容:\n\n```python\n{\n \"manifest\": {...},\n \"body\": \"... full skill text ...\",\n}\n```\n\n### 3. SkillRegistry\n\n你最好不要把 skill 散着读取。\n\n更清楚的方式是做一个统一注册表:\n\n```python\nregistry = {\n \"code-review\": SkillDocument(...),\n \"git-workflow\": SkillDocument(...),\n}\n```\n\n它至少要能回答两个问题:\n\n1. 有哪些 skill 可用\n2. 某个 skill 的完整内容是什么\n\n## 最小实现\n\n### 第一步:把每个 skill 放成一个目录\n\n最小结构可以这样:\n\n```text\nskills/\n code-review/\n SKILL.md\n git-workflow/\n SKILL.md\n```\n\n### 第二步:从 `SKILL.md` 里读取最小元信息\n\n```python\nclass SkillRegistry:\n def __init__(self, skills_dir):\n self.skills = {}\n self._load_all()\n\n def _load_all(self):\n for path in skills_dir.rglob(\"SKILL.md\"):\n meta, body = parse_frontmatter(path.read_text())\n name = meta.get(\"name\", path.parent.name)\n self.skills[name] = {\n \"manifest\": {\n \"name\": name,\n \"description\": meta.get(\"description\", \"\"),\n },\n \"body\": body,\n }\n```\n\n这里的 `frontmatter` 你可以先简单理解成:\n\n> 放在正文前面的一小段结构化元数据。\n\n### 第三步:把 skill 目录放进 system prompt\n\n```python\nSYSTEM = f\"\"\"You are a coding agent.\nSkills available:\n{SKILL_REGISTRY.describe_available()}\n\"\"\"\n```\n\n注意这里放的是**目录信息**,不是完整正文。\n\n### 第四步:提供一个 `load_skill` 工具\n\n```python\nTOOL_HANDLERS = {\n \"load_skill\": lambda **kw: SKILL_REGISTRY.load_full_text(kw[\"name\"]),\n}\n```\n\n当模型调用它时,把完整 skill 正文作为 `tool_result` 返回。\n\n### 第五步:让 skill 正文只在当前需要时进入上下文\n\n这一步的核心思想就是:\n\n> 平时只展示“有哪些知识包”,真正工作时才把那一包展开。\n\n## skill、memory、CLAUDE.md 的边界\n\n这三个概念很容易混。\n\n### skill\n\n可选知识包。 \n只有在某类任务需要时才加载。\n\n### memory\n\n跨会话仍然有价值的信息。 \n它是系统记住的东西,不是任务手册。\n\n### CLAUDE.md\n\n更稳定、更长期的规则说明。 \n它通常比单个 skill 更“全局”。\n\n一个简单判断法:\n\n- 这是某类任务才需要的做法或知识:`skill`\n- 这是需要长期记住的事实或偏好:`memory`\n- 这是更稳定的全局规则:`CLAUDE.md`\n\n## 它如何接到主循环里\n\n这一章以后,system prompt 不再只是一段固定身份说明。\n\n它开始长出一个很重要的新段落:\n\n- 可用技能目录\n\n而消息流里则会出现新的按需注入内容:\n\n- 某个 skill 的完整正文\n\n也就是说,系统输入现在开始分成两层:\n\n```text\n稳定层:\n 身份、规则、工具、skill 目录\n\n按需层:\n 当前真的加载进来的 skill 正文\n```\n\n这也是 `s10` 会继续系统化展开的东西。\n\n## 初学者最容易犯的错\n\n### 1. 把所有 skill 正文永远塞进 system prompt\n\n这样会让 prompt 很快臃肿到难以维护。\n\n### 2. skill 目录信息写得太弱\n\n如果只有名字,没有描述,模型就不知道什么时候该加载它。\n\n### 3. 把 skill 当成“绝对规则”\n\nskill 更像“可选工作手册”,不是所有轮次都必须用。\n\n### 4. 把 skill 和 memory 混成一类\n\nskill 解决的是“怎么做一类事”,memory 解决的是“记住长期事实”。\n\n### 5. 一上来就讲太多多源加载细节\n\n教学主线真正要先讲清的是:\n\n**轻量发现,重内容按需加载。**\n\n## 教学边界\n\n这章只要先守住两层就够了:\n\n- 轻量发现:先告诉模型有哪些 skill\n- 按需深加载:真正需要时再把正文放进输入\n\n所以这里不用提前扩到:\n\n- 多来源收集\n- 条件激活\n- skill 参数化\n- fork 式执行\n- 更复杂的 prompt 管道拼装\n\n如果读者已经明白“为什么不能把所有 skill 永远塞进 system prompt,而应该先列目录、再按需加载”,这章就已经讲到位了。\n\n## 一句话记住\n\n**Skill 系统的核心,不是“多一个工具”,而是“把可选知识从常驻 prompt 里拆出来,改成按需加载”。**\n"
},
{
"version": "s06",
+ "slug": "s06-context-compact",
"locale": "zh",
"title": "s06: Context Compact (上下文压缩)",
- "content": "# s06: Context Compact (上下文压缩)\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"上下文总会满, 要有办法腾地方\"* -- 三层压缩策略, 换来无限会话。\n\n## 问题\n\n上下文窗口是有限的。读一个 1000 行的文件就吃掉 ~4000 token; 读 30 个文件、跑 20 条命令, 轻松突破 100k token。不压缩, Agent 根本没法在大项目里干活。\n\n## 解决方案\n\n三层压缩, 激进程度递增:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 工作原理\n\n1. **第一层 -- micro_compact**: 每次 LLM 调用前, 将旧的 tool result 替换为占位符。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第二层 -- auto_compact**: token 超过阈值时, 保存完整对话到磁盘, 让 LLM 做摘要。\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第三层 -- manual compact**: `compact` 工具按需触发同样的摘要机制。\n\n4. 循环整合三层:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\n完整历史通过 transcript 保存在磁盘上。信息没有真正丢失, 只是移出了活跃上下文。\n\n## 相对 s05 的变更\n\n| 组件 | 之前 (s05) | 之后 (s06) |\n|----------------|------------------|--------------------------------|\n| Tools | 5 | 5 (基础 + compact) |\n| 上下文管理 | 无 | 三层压缩 |\n| Micro-compact | 无 | 旧结果 -> 占位符 |\n| Auto-compact | 无 | token 阈值触发 |\n| Transcripts | 无 | 保存到 .transcripts/ |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Read every Python file in the agents/ directory one by one` (观察 micro-compact 替换旧结果)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n"
+ "kind": "chapter",
+ "filename": "s06-context-compact.md",
+ "content": "# s06: Context Compact (上下文压缩)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > [ s06 ] > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *上下文不是越多越好,而是要把“仍然有用的部分”留在活跃工作面里。*\n\n## 这一章要解决什么问题\n\n到了 `s05`,agent 已经会:\n\n- 读写文件\n- 规划步骤\n- 派子 agent\n- 按需加载 skill\n\n也正因为它会做的事情更多了,上下文会越来越快膨胀:\n\n- 读一个大文件,会塞进很多文本\n- 跑一条长命令,会得到大段输出\n- 多轮任务推进后,旧结果会越来越多\n\n如果没有压缩机制,很快就会出现这些问题:\n\n1. 模型注意力被旧结果淹没\n2. API 请求越来越重,越来越贵\n3. 最终直接撞上上下文上限,任务中断\n\n所以这一章真正要解决的是:\n\n**怎样在不丢掉主线连续性的前提下,把活跃上下文重新腾出空间。**\n\n## 先解释几个名词\n\n### 什么是上下文窗口\n\n你可以把上下文窗口理解成:\n\n> 模型这一轮真正能一起看到的输入容量。\n\n它不是无限的。\n\n### 什么是活跃上下文\n\n并不是历史上出现过的所有内容,都必须一直留在窗口里。\n\n活跃上下文更像:\n\n> 当前这几轮继续工作时,最值得模型马上看到的那一部分。\n\n### 什么是压缩\n\n这里的压缩,不是 ZIP 压缩文件。\n\n它的意思是:\n\n> 用更短的表示方式,保留继续工作真正需要的信息。\n\n例如:\n\n- 大输出只保留预览,全文写到磁盘\n- 很久以前的工具结果改成占位提示\n- 整段长历史总结成一份摘要\n\n## 最小心智模型\n\n这一章建议你先记三层,不要一上来记八层十层:\n\n```text\n第 1 层:大结果不直接塞进上下文\n -> 写到磁盘,只留预览\n\n第 2 层:旧结果不一直原样保留\n -> 替换成简短占位\n\n第 3 层:整体历史太长时\n -> 生成一份连续性摘要\n```\n\n可以画成这样:\n\n```text\ntool output\n |\n +-- 太大 -----------------> 保存到磁盘 + 留预览\n |\n v\nmessages\n |\n +-- 太旧 -----------------> 替换成占位提示\n |\n v\nif whole context still too large:\n |\n v\ncompact history -> summary\n```\n\n手动触发 `/compact` 或 `compact` 工具,本质上也是走第 3 层。\n\n## 关键数据结构\n\n### 1. Persisted Output Marker\n\n当工具输出太大时,不要把全文强塞进当前对话。\n\n最小标记可以长这样:\n\n```text\n\nFull output saved to: .task_outputs/tool-results/abc123.txt\nPreview:\n...\n \n```\n\n这个结构表达的是:\n\n- 全文没有丢\n- 只是搬去了磁盘\n- 当前上下文里只保留一个足够让模型继续判断的预览\n\n### 2. CompactState\n\n最小教学版建议你显式维护一份压缩状态:\n\n```python\n{\n \"has_compacted\": False,\n \"last_summary\": \"\",\n \"recent_files\": [],\n}\n```\n\n这里的字段分别表示:\n\n- `has_compacted`:这一轮之前是否已经做过完整压缩\n- `last_summary`:最近一次压缩得到的摘要\n- `recent_files`:最近碰过哪些文件,压缩后方便继续追踪\n\n### 3. Micro-Compact Boundary\n\n教学版可以先设一条简单规则:\n\n```text\n只保留最近 3 个工具结果的完整内容\n更旧的改成占位提示\n```\n\n这就已经足够让初学者理解:\n\n**不是所有历史都要原封不动地一直带着跑。**\n\n## 最小实现\n\n### 第一步:大工具结果先写磁盘\n\n```python\ndef persist_large_output(tool_use_id: str, output: str) -> str:\n if len(output) <= PERSIST_THRESHOLD:\n return output\n\n stored_path = save_to_disk(tool_use_id, output)\n preview = output[:2000]\n return (\n \"\\n\"\n f\"Full output saved to: {stored_path}\\n\"\n f\"Preview:\\n{preview}\\n\"\n \" \"\n )\n```\n\n这一步的关键思想是:\n\n> 让模型知道“发生了什么”,但不强迫它一直背着整份原始大输出。\n\n### 第二步:旧工具结果做微压缩\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = collect_tool_results(messages)\n for result in tool_results[:-3]:\n result[\"content\"] = \"[Earlier tool result omitted for brevity]\"\n return messages\n```\n\n这一步不是为了优雅,而是为了防止上下文被旧结果持续霸占。\n\n### 第三步:整体历史过长时,做一次完整压缩\n\n```python\ndef compact_history(messages: list) -> list:\n summary = summarize_conversation(messages)\n return [{\n \"role\": \"user\",\n \"content\": (\n \"This conversation was compacted for continuity.\\n\\n\"\n + summary\n ),\n }]\n```\n\n这里最重要的不是摘要格式多么复杂,而是你要保住这几类信息:\n\n- 当前目标是什么\n- 已经做了什么\n- 改过哪些文件\n- 还有什么没完成\n- 哪些决定不能丢\n\n### 第四步:在主循环里接入压缩\n\n```python\ndef agent_loop(state):\n while True:\n state[\"messages\"] = micro_compact(state[\"messages\"])\n\n if estimate_context_size(state[\"messages\"]) > CONTEXT_LIMIT:\n state[\"messages\"] = compact_history(state[\"messages\"])\n state[\"has_compacted\"] = True\n\n response = call_model(...)\n ...\n```\n\n### 第五步:手动压缩和自动压缩复用同一条机制\n\n教学版里,`compact` 工具不需要重新发明另一套逻辑。\n\n它只需要表达:\n\n> 用户或模型现在主动要求执行一次完整压缩。\n\n## 压缩后,真正要保住什么\n\n这是这章最容易讲虚的地方。\n\n压缩不是“把历史缩短”这么简单。\n\n真正重要的是:\n\n**让模型还能继续接着干活。**\n\n所以一份合格的压缩结果,至少要保住下面这些东西:\n\n1. 当前任务目标\n2. 已完成的关键动作\n3. 已修改或重点查看过的文件\n4. 关键决定与约束\n5. 下一步应该做什么\n\n如果这些没有保住,那压缩虽然腾出了空间,却打断了工作连续性。\n\n## 它如何接到主循环里\n\n从这一章开始,主循环不再只是:\n\n- 收消息\n- 调模型\n- 跑工具\n\n它还多了一个很关键的责任:\n\n- 管理活跃上下文的预算\n\n也就是说,agent loop 现在开始同时维护两件事:\n\n```text\n任务推进\n上下文预算\n```\n\n这一步非常重要,因为后面的很多机制都会和它联动:\n\n- `s09` memory 决定什么信息值得长期保存\n- `s10` prompt pipeline 决定哪些块应该重新注入\n- `s11` error recovery 会处理压缩不足时的恢复分支\n\n## 初学者最容易犯的错\n\n### 1. 以为压缩等于删除\n\n不是。\n\n更准确地说,是把“不必常驻活跃上下文”的内容换一种表示。\n\n### 2. 只在撞到上限后才临时乱补\n\n更好的做法是从一开始就有三层思路:\n\n- 大结果先落盘\n- 旧结果先缩短\n- 整体过长再摘要\n\n### 3. 摘要只写成一句空话\n\n如果摘要没有保住文件、决定、下一步,它对继续工作没有帮助。\n\n### 4. 把压缩和 memory 混成一类\n\n压缩解决的是:\n\n- 当前会话太长了怎么办\n\nmemory 解决的是:\n\n- 哪些信息跨会话仍然值得保留\n\n### 5. 一上来就给初学者讲过多产品化层级\n\n教学主线先讲清最小正确模型,比堆很多层名词更重要。\n\n## 教学边界\n\n这章不要滑成“所有产品化压缩技巧大全”。\n\n教学版只需要讲清三件事:\n\n1. 什么该留在活跃上下文里\n2. 什么该搬到磁盘或占位标记里\n3. 完整压缩后,哪些连续性信息一定不能丢\n\n这已经足够建立稳定心智:\n\n**压缩不是删历史,而是把细节搬走,好让系统继续工作。**\n\n如果读者已经能用 `persisted output + micro compact + summary compact` 保住长会话连续性,这章就已经够深了。\n\n## 一句话记住\n\n**上下文压缩的核心,不是尽量少字,而是让模型在更短的活跃上下文里,仍然保住继续工作的连续性。**\n"
},
{
"version": "s07",
+ "slug": "s07-permission-system",
"locale": "zh",
- "title": "s07: Task System (任务系统)",
- "content": "# s07: Task System (任务系统)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"大目标要拆成小任务, 排好序, 记在磁盘上\"* -- 文件持久化的任务图, 为多 agent 协作打基础。\n\n## 问题\n\ns03 的 TodoManager 只是内存中的扁平清单: 没有顺序、没有依赖、状态只有做完没做完。真实目标是有结构的 -- 任务 B 依赖任务 A, 任务 C 和 D 可以并行, 任务 E 要等 C 和 D 都完成。\n\n没有显式的关系, Agent 分不清什么能做、什么被卡住、什么能同时跑。而且清单只活在内存里, 上下文压缩 (s06) 一跑就没了。\n\n## 解决方案\n\n把扁平清单升级为持久化到磁盘的**任务图**。每个任务是一个 JSON 文件, 有状态、前置依赖 (`blockedBy`) 和后置依赖 (`blocks`)。任务图随时回答三个问题:\n\n- **什么可以做?** -- 状态为 `pending` 且 `blockedBy` 为空的任务。\n- **什么被卡住?** -- 等待前置任务完成的任务。\n- **什么做完了?** -- 状态为 `completed` 的任务, 完成时自动解锁后续任务。\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\n任务图 (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\n顺序: task 1 必须先完成, 才能开始 2 和 3\n并行: task 2 和 3 可以同时执行\n依赖: task 4 要等 2 和 3 都完成\n状态: pending -> in_progress -> completed\n```\n\n这个任务图是 s07 之后所有机制的协调骨架: 后台执行 (s08)、多 agent 团队 (s09+)、worktree 隔离 (s12) 都读写这同一个结构。\n\n## 工作原理\n\n1. **TaskManager**: 每个任务一个 JSON 文件, CRUD + 依赖图。\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **依赖解除**: 完成任务时, 自动将其 ID 从其他任务的 `blockedBy` 中移除, 解锁后续任务。\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **状态变更 + 依赖关联**: `update` 处理状态转换和依赖边。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. 四个任务工具加入 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\n从 s07 起, 任务图是多步工作的默认选择。s03 的 Todo 仍可用于单次会话内的快速清单。\n\n## 相对 s06 的变更\n\n| 组件 | 之前 (s06) | 之后 (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 规划模型 | 扁平清单 (仅内存) | 带依赖关系的任务图 (磁盘) |\n| 关系 | 无 | `blockedBy` + `blocks` 边 |\n| 状态追踪 | 做完没做完 | `pending` -> `in_progress` -> `completed` |\n| 持久化 | 压缩后丢失 | 压缩和重启后存活 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n"
+ "title": "s07: Permission System (权限系统)",
+ "kind": "chapter",
+ "filename": "s07-permission-system.md",
+ "content": "# s07: Permission System (权限系统)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > [ s07 ] > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *模型可以提出行动建议,但真正执行之前,必须先过安全关。*\n\n## 这一章的核心目标\n\n到了 `s06`,你的 agent 已经能读文件、改文件、跑命令、做规划、压缩上下文。\n\n问题也随之出现了:\n\n- 模型可能会写错文件\n- 模型可能会执行危险命令\n- 模型可能会在不该动手的时候动手\n\n所以从这一章开始,系统需要一条新的管道:\n\n**“意图”不能直接变成“执行”,中间必须经过权限检查。**\n\n## 建议联读\n\n- 如果你开始把“模型提议动作”和“系统真的执行动作”混成一件事,先回 [`s00a-query-control-plane.md`](./s00a-query-control-plane.md),重新确认 query 是怎么进入控制面的。\n- 如果你还没彻底稳住“工具请求为什么不能直接落到 handler”,建议把 [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md) 放在手边一起读。\n- 如果你在 `PermissionRule / PermissionDecision / tool_result` 这几层对象上开始打结,先回 [`data-structures.md`](./data-structures.md),把状态边界重新拆开。\n\n## 先解释几个名词\n\n### 什么是权限系统\n\n权限系统不是“有没有权限”这样一个布尔值。\n\n它更像一条管道,用来回答:\n\n1. 这次调用要不要直接拒绝?\n2. 能不能自动放行?\n3. 剩下的要不要问用户?\n\n### 什么是权限模式\n\n权限模式是系统当前的总体风格。\n\n例如:\n\n- 谨慎一点:大多数操作都问用户\n- 保守一点:只允许读,不允许写\n- 流畅一点:简单安全的操作自动放行\n\n### 什么是规则\n\n规则就是“遇到某种工具调用时,该怎么处理”的小条款。\n\n最小规则通常包含三部分:\n\n```python\n{\n \"tool\": \"bash\",\n \"content\": \"sudo *\",\n \"behavior\": \"deny\",\n}\n```\n\n意思是:\n\n- 针对 `bash`\n- 如果命令内容匹配 `sudo *`\n- 就拒绝\n\n## 最小权限系统应该长什么样\n\n如果你是从 0 开始手写,一个最小但正确的权限系统只需要四步:\n\n```text\ntool_call\n |\n v\n1. deny rules -> 命中了就拒绝\n |\n v\n2. mode check -> 根据当前模式决定\n |\n v\n3. allow rules -> 命中了就放行\n |\n v\n4. ask user -> 剩下的交给用户确认\n```\n\n这四步已经能覆盖教学仓库 80% 的核心需要。\n\n## 为什么顺序是这样\n\n### 第 1 步先看 deny rules\n\n因为有些东西不应该交给“模式”去决定。\n\n比如:\n\n- 明显危险的命令\n- 明显越界的路径\n\n这些应该优先挡掉。\n\n### 第 2 步看 mode\n\n因为模式决定当前会话的大方向。\n\n例如在 `plan` 模式下,系统就应该天然更保守。\n\n### 第 3 步看 allow rules\n\n有些安全、重复、常见的操作可以直接过。\n\n比如:\n\n- 读文件\n- 搜索代码\n- 查看 git 状态\n\n### 第 4 步才 ask\n\n前面都没命中的灰区,才交给用户。\n\n## 推荐先实现的 3 种模式\n\n不要一上来就做特别多模式。 \n先把下面三种做稳:\n\n| 模式 | 含义 | 适合什么场景 |\n|---|---|---|\n| `default` | 未命中规则时问用户 | 日常交互 |\n| `plan` | 只允许读,不允许写 | 计划、审查、分析 |\n| `auto` | 简单安全操作自动过,危险操作再问 | 高流畅度探索 |\n\n先有这三种,你就已经有了一个可用的权限系统。\n\n## 这一章最重要的数据结构\n\n### 1. 权限规则\n\n```python\nPermissionRule = {\n \"tool\": str,\n \"behavior\": \"allow\" | \"deny\" | \"ask\",\n \"path\": str | None,\n \"content\": str | None,\n}\n```\n\n你不一定一开始就需要 `path` 和 `content` 都支持。 \n但规则至少要能表达:\n\n- 针对哪个工具\n- 命中后怎么处理\n\n### 2. 权限模式\n\n```python\nmode = \"default\" | \"plan\" | \"auto\"\n```\n\n### 3. 权限决策结果\n\n```python\n{\n \"behavior\": \"allow\" | \"deny\" | \"ask\",\n \"reason\": \"why this decision was made\"\n}\n```\n\n这三个结构已经足够搭起最小系统。\n\n## 最小实现怎么写\n\n```python\ndef check_permission(tool_name: str, tool_input: dict) -> dict:\n # 1. deny rules\n for rule in deny_rules:\n if matches(rule, tool_name, tool_input):\n return {\"behavior\": \"deny\", \"reason\": \"matched deny rule\"}\n\n # 2. mode\n if mode == \"plan\" and tool_name in WRITE_TOOLS:\n return {\"behavior\": \"deny\", \"reason\": \"plan mode blocks writes\"}\n if mode == \"auto\" and tool_name in READ_ONLY_TOOLS:\n return {\"behavior\": \"allow\", \"reason\": \"auto mode allows reads\"}\n\n # 3. allow rules\n for rule in allow_rules:\n if matches(rule, tool_name, tool_input):\n return {\"behavior\": \"allow\", \"reason\": \"matched allow rule\"}\n\n # 4. fallback\n return {\"behavior\": \"ask\", \"reason\": \"needs confirmation\"}\n```\n\n然后在执行工具前接进去:\n\n```python\ndecision = perms.check(tool_name, tool_input)\n\nif decision[\"behavior\"] == \"deny\":\n return f\"Permission denied: {decision['reason']}\"\nif decision[\"behavior\"] == \"ask\":\n ok = ask_user(...)\n if not ok:\n return \"Permission denied by user\"\n\nreturn handler(**tool_input)\n```\n\n## Bash 为什么值得单独讲\n\n所有工具里,`bash` 通常最危险。\n\n因为:\n\n- `read_file` 只能读文件\n- `write_file` 只能写文件\n- 但 `bash` 几乎能做任何事\n\n所以你不能只把 bash 当成一个普通字符串。\n\n一个更成熟的系统,通常会把 bash 当成一门小语言来检查。\n\n哪怕教学版不做完整语法分析,也建议至少先挡住这些明显危险点:\n\n- `sudo`\n- `rm -rf`\n- 命令替换\n- 可疑重定向\n- 明显的 shell 元字符拼接\n\n这背后的核心思想只有一句:\n\n**bash 不是普通文本,而是可执行动作描述。**\n\n## 初学者怎么把这章做对\n\n### 第一步:先做 3 个模式\n\n不要一开始就做 6 个模式、10 个来源、复杂 classifier。\n\n先稳稳做出:\n\n- `default`\n- `plan`\n- `auto`\n\n### 第二步:先做 deny / allow 两类规则\n\n这已经足够表达很多现实需求。\n\n### 第三步:给 bash 加最小安全检查\n\n哪怕只是模式匹配版,也比完全裸奔好很多。\n\n### 第四步:加拒绝计数\n\n如果 agent 连续多次被拒绝,说明它可能卡住了。\n\n这时可以:\n\n- 给出提示\n- 建议切到 `plan`\n- 让用户重新澄清目标\n\n## 教学边界\n\n这一章先只讲透一条权限管道就够了:\n\n- 工具意图先进入权限判断\n- 权限结果只分成 `allow / ask / deny`\n- 通过以后才真的执行\n\n先把这条主线做稳,比一开始塞进很多模式名、规则来源、写回配置、额外目录、自动分类器都更重要。\n\n换句话说,这章要先让读者真正理解的是:\n\n**任何工具调用,都不应该直接执行;中间必须先过一条权限管道。**\n\n## 这章不应该讲太多什么\n\n为了不打乱初学者心智,这章不应该过早陷入:\n\n- 企业策略源的全部优先级\n- 非常复杂的自动分类器\n- 产品环境里的所有无头模式细节\n- 某个特定生产代码里的全部 validator 名称\n\n这些东西存在,但不属于第一层理解。\n\n第一层理解只有一句话:\n\n**任何工具调用,都不应该直接执行;中间必须先过一条权限管道。**\n\n## 这一章和后续章节的关系\n\n- `s07` 决定“能不能执行”\n- `s08` 决定“执行前后还能不能插入额外逻辑”\n- `s10` 会把当前模式和权限说明放进 prompt 组装里\n\n所以这章是后面很多机制的安全前提。\n\n## 学完这章后,你应该能回答\n\n- 为什么权限系统不是一个简单开关?\n- 为什么 deny 要先于 allow?\n- 为什么要先做 3 个模式,而不是一上来做很复杂?\n- 为什么 bash 要被特殊对待?\n\n---\n\n**一句话记住:权限系统不是为了让 agent 更笨,而是为了让 agent 的行动先经过一道可靠的安全判断。**\n"
},
{
"version": "s08",
+ "slug": "s08-hook-system",
"locale": "zh",
- "title": "s08: Background Tasks (后台任务)",
- "content": "# s08: Background Tasks (后台任务)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"慢操作丢后台, agent 继续想下一步\"* -- 后台线程跑命令, 完成后注入通知。\n\n## 问题\n\n有些命令要跑好几分钟: `npm install`、`pytest`、`docker build`。阻塞式循环下模型只能干等。用户说 \"装依赖, 顺便建个配置文件\", Agent 却只能一个一个来。\n\n## 解决方案\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## 工作原理\n\n1. BackgroundManager 用线程安全的通知队列追踪任务。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` 启动守护线程, 立即返回。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. 子进程完成后, 结果进入通知队列。\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. 每次 LLM 调用前排空通知队列。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\" \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\n循环保持单线程。只有子进程 I/O 被并行化。\n\n## 相对 s07 的变更\n\n| 组件 | 之前 (s07) | 之后 (s08) |\n|----------------|------------------|------------------------------------|\n| Tools | 8 | 6 (基础 + background_run + check) |\n| 执行方式 | 仅阻塞 | 阻塞 + 后台线程 |\n| 通知机制 | 无 | 每轮排空的队列 |\n| 并发 | 无 | 守护线程 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n"
+ "title": "s08: Hook System (Hook 系统)",
+ "kind": "chapter",
+ "filename": "s08-hook-system.md",
+ "content": "# s08: Hook System (Hook 系统)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > [ s08 ] > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *不改主循环代码,也能在关键时机插入额外行为。*\n\n## 这章要解决什么问题\n\n到了 `s07`,我们已经能在工具执行前做权限判断。\n\n但很多真实需求并不属于“允许 / 拒绝”这条线,而属于:\n\n- 在某个固定时机顺手做一点事\n- 不改主循环主体,也能接入额外规则\n- 让用户或插件在系统边缘扩展能力\n\n例如:\n\n- 会话开始时打印欢迎信息\n- 工具执行前做一次额外检查\n- 工具执行后补一条审计日志\n\n如果每增加一个需求,你都去修改主循环,主循环就会越来越重,最后谁都不敢动。\n\n所以这一章要引入的机制是:\n\n**主循环只负责暴露“时机”,真正的附加行为交给 hook。**\n\n## 建议联读\n\n- 如果你还在把 hook 想成“往主循环里继续塞 if/else”,先回 [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md),重新确认主循环和控制面的边界。\n- 如果你开始把主循环、tool handler、hook side effect 混成一层,建议先看 [`entity-map.md`](./entity-map.md),把谁负责推进主状态、谁只是旁路观察分开。\n- 如果你准备继续读后面的 prompt、recovery、teams,可以把 [`s00e-reference-module-map.md`](./s00e-reference-module-map.md) 一起放在旁边,因为从这一章开始“控制面 + 侧车扩展”会反复一起出现。\n\n## 什么是 hook\n\n你可以把 `hook` 理解成一个“预留插口”。\n\n意思是:\n\n1. 主系统运行到某个固定时机\n2. 把当前上下文交给 hook\n3. hook 返回结果\n4. 主系统再决定下一步怎么继续\n\n最重要的一句话是:\n\n**hook 让系统可扩展,但不要求主循环理解每个扩展需求。**\n\n主循环只需要知道三件事:\n\n- 现在是什么事件\n- 要把哪些上下文交出去\n- 收到结果以后怎么处理\n\n## 最小心智模型\n\n教学版先只讲 3 个事件:\n\n- `SessionStart`\n- `PreToolUse`\n- `PostToolUse`\n\n这样做不是因为系统永远只有 3 个事件, \n而是因为初学者先把这 3 个事件学明白,就已经能自己做出一套可用的 hook 机制。\n\n可以把它想成这条流程:\n\n```text\n主循环继续往前跑\n |\n +-- 到了某个预留时机\n |\n +-- 调用 hook runner\n |\n +-- 收到 hook 返回结果\n |\n +-- 决定继续、阻止、还是补充说明\n```\n\n## 教学版统一返回约定\n\n这一章最容易把人讲乱的地方,就是“不同 hook 事件的返回语义”。\n\n教学版建议先统一成下面这套规则:\n\n| 退出码 | 含义 |\n|---|---|\n| `0` | 正常继续 |\n| `1` | 阻止当前动作 |\n| `2` | 注入一条补充消息,再继续 |\n\n这套规则的价值不在于“最真实”,而在于“最容易学会”。\n\n因为它让你先记住 hook 最核心的 3 种作用:\n\n- 观察\n- 拦截\n- 补充\n\n等教学版跑通以后,再去做“不同事件采用不同语义”的细化,也不会乱。\n\n## 关键数据结构\n\n### 1. HookEvent\n\n```python\nevent = {\n \"name\": \"PreToolUse\",\n \"payload\": {\n \"tool_name\": \"bash\",\n \"input\": {\"command\": \"pytest\"},\n },\n}\n```\n\n它回答的是:\n\n- 现在发生了什么事\n- 这件事的上下文是什么\n\n### 2. HookResult\n\n```python\nresult = {\n \"exit_code\": 0,\n \"message\": \"\",\n}\n```\n\n它回答的是:\n\n- hook 想不想阻止主流程\n- 要不要向模型补一条说明\n\n### 3. HookRunner\n\n```python\nclass HookRunner:\n def run(self, event_name: str, payload: dict) -> dict:\n ...\n```\n\n主循环不直接关心“每个 hook 的细节实现”。 \n它只把事件交给统一的 runner。\n\n这就是这一章的关键抽象边界:\n\n**主循环知道事件名,hook runner 知道怎么调扩展逻辑。**\n\n## 最小执行流程\n\n先看最重要的 `PreToolUse` / `PostToolUse`:\n\n```text\nmodel 发起 tool_use\n |\n v\nrun_hook(\"PreToolUse\", ...)\n |\n +-- exit 1 -> 阻止工具执行\n +-- exit 2 -> 先补一条消息给模型,再继续\n +-- exit 0 -> 直接继续\n |\n v\n执行工具\n |\n v\nrun_hook(\"PostToolUse\", ...)\n |\n +-- exit 2 -> 追加补充说明\n +-- exit 0 -> 正常结束\n```\n\n再加上 `SessionStart`,一整套最小 hook 机制就立住了。\n\n## 最小实现\n\n### 第一步:准备一个事件到处理器的映射\n\n```python\nHOOKS = {\n \"SessionStart\": [on_session_start],\n \"PreToolUse\": [pre_tool_guard],\n \"PostToolUse\": [post_tool_log],\n}\n```\n\n这里先用“一个事件对应一组处理函数”的最小结构就够了。\n\n### 第二步:统一运行 hook\n\n```python\ndef run_hooks(event_name: str, payload: dict) -> dict:\n for handler in HOOKS.get(event_name, []):\n result = handler(payload)\n if result[\"exit_code\"] in (1, 2):\n return result\n return {\"exit_code\": 0, \"message\": \"\"}\n```\n\n教学版里先用“谁先返回阻止/注入,谁就优先”的简单规则。\n\n### 第三步:接进主循环\n\n```python\npre = run_hooks(\"PreToolUse\", {\n \"tool_name\": block.name,\n \"input\": block.input,\n})\n\nif pre[\"exit_code\"] == 1:\n results.append(blocked_tool_result(pre[\"message\"]))\n continue\n\nif pre[\"exit_code\"] == 2:\n messages.append({\"role\": \"user\", \"content\": pre[\"message\"]})\n\noutput = run_tool(...)\n\npost = run_hooks(\"PostToolUse\", {\n \"tool_name\": block.name,\n \"input\": block.input,\n \"output\": output,\n})\n```\n\n这一步最关键的不是代码量,而是心智:\n\n**hook 不是主循环的替代品,hook 是主循环在固定时机对外发出的调用。**\n\n## 这一章的教学边界\n\n如果你后面继续扩展平台,hook 事件面当然会继续扩大。\n\n常见扩展方向包括:\n\n- 生命周期事件:开始、结束、配置变化\n- 工具事件:执行前、执行后、失败后\n- 压缩事件:压缩前、压缩后\n- 多 agent 事件:子 agent 启动、任务完成、队友空闲\n\n但教学仓这里要守住一个原则:\n\n**先把 hook 的统一模型讲清,再慢慢增加事件种类。**\n\n不要一开始就把几十种事件、几十套返回语义全部灌给读者。\n\n## 初学者最容易犯的错\n\n### 1. 把 hook 当成“到处插 if”\n\n如果还是散落在主循环里写条件分支,那还不是真正的 hook 设计。\n\n### 2. 没有统一的返回结构\n\n今天返回字符串,明天返回布尔值,后天返回整数,最后主循环一定会变乱。\n\n### 3. 一上来就把所有事件做全\n\n教学顺序应该是:\n\n1. 先学会 3 个事件\n2. 再学会统一返回协议\n3. 最后才扩事件面\n\n### 4. 忘了说明“教学版统一语义”和“高完成度细化语义”的区别\n\n如果这层不提前说清,读者后面看到更复杂实现时会以为前面学错了。\n\n其实不是学错了,而是:\n\n**先学统一模型,再学事件细化。**\n\n## 学完这一章,你应该真正掌握什么\n\n学完以后,你应该能自己清楚说出下面几句话:\n\n1. hook 的作用,是在固定时机扩展系统,而不是改写主循环。\n2. hook 至少需要“事件名 + payload + 返回结果”这三样东西。\n3. 教学版可以先用统一的 `0 / 1 / 2` 返回约定。\n4. `PreToolUse` 和 `PostToolUse` 已经足够支撑最核心的扩展能力。\n\n如果这 4 句话你已经能独立复述,说明这一章的核心心智已经建立起来了。\n\n## 下一章学什么\n\n这一章解决的是:\n\n> 在固定时机插入行为。\n\n下一章 `s09` 要解决的是:\n\n> 哪些信息应该跨会话留下,哪些不该留。\n\n也就是从“扩展点”进一步走向“持久状态”。\n"
},
{
"version": "s09",
+ "slug": "s09-memory-system",
"locale": "zh",
- "title": "s09: Agent Teams (Agent 团队)",
- "content": "# s09: Agent Teams (Agent 团队)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"任务太大一个人干不完, 要能分给队友\"* -- 持久化队友 + JSONL 邮箱。\n\n## 问题\n\nSubagent (s04) 是一次性的: 生成、干活、返回摘要、消亡。没有身份, 没有跨调用的记忆。Background Tasks (s08) 能跑 shell 命令, 但做不了 LLM 引导的决策。\n\n真正的团队协作需要三样东西: (1) 能跨多轮对话存活的持久 Agent, (2) 身份和生命周期管理, (3) Agent 之间的通信通道。\n\n## 解决方案\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## 工作原理\n\n1. TeammateManager 通过 config.json 维护团队名册。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` 创建队友并在线程中启动 agent loop。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: append-only 的 JSONL 收件箱。`send()` 追加一行; `read_inbox()` 读取全部并清空。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 每个队友在每次 LLM 调用前检查收件箱, 将消息注入上下文。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|----------------|------------------|------------------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agent 数量 | 单一 | 领导 + N 个队友 |\n| 持久化 | 无 | config.json + JSONL 收件箱 |\n| 线程 | 后台命令 | 每线程完整 agent loop |\n| 生命周期 | 一次性 | idle -> working -> idle |\n| 通信 | 无 | message + broadcast |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. 输入 `/team` 查看团队名册和状态\n5. 输入 `/inbox` 手动检查领导的收件箱\n"
+ "title": "s09: Memory System (记忆系统)",
+ "kind": "chapter",
+ "filename": "s09-memory-system.md",
+ "content": "# s09: Memory System (记忆系统)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > [ s09 ] > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *不是所有信息都该进入 memory;只有跨会话仍然有价值的信息,才值得留下。*\n\n## 这一章在解决什么问题\n\n如果一个 agent 每次新会话都完全从零开始,它就会不断重复忘记这些事情:\n\n- 用户长期偏好\n- 用户多次纠正过的错误\n- 某些不容易从代码直接看出来的项目约定\n- 某些外部资源在哪里找\n\n这会让系统显得“每次都像第一次合作”。\n\n所以需要 memory。\n\n## 但先立一个边界:memory 不是什么都存\n\n这是这一章最容易讲歪的地方。\n\nmemory 不是“把一切有用信息都记下来”。\n\n如果你这样做,很快就会出现两个问题:\n\n1. memory 变成垃圾堆,越存越乱\n2. agent 开始依赖过时记忆,而不是读取当前真实状态\n\n所以这章必须先立一个原则:\n\n**只有那些跨会话仍然有价值,而且不能轻易从当前仓库状态直接推出来的信息,才适合进入 memory。**\n\n## 建议联读\n\n- 如果你还把 memory 想成“更长一点的上下文窗口”,先回 [`s06-context-compact.md`](./s06-context-compact.md),重新确认 compact 和长期记忆是两套机制。\n- 如果你在 `messages[]`、摘要块、memory store 这三层之间开始读混,建议边看边对照 [`data-structures.md`](./data-structures.md)。\n- 如果你准备继续读 `s10`,最好把 [`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md) 放在旁边,因为 memory 真正重要的是它怎样重新进入下一轮输入。\n\n## 先解释几个名词\n\n### 什么是“跨会话”\n\n意思是:\n\n- 当前对话结束了\n- 下次重新开始一个新对话\n- 这条信息仍然可能有用\n\n### 什么是“不可轻易重新推导”\n\n例如:\n\n- 用户明确说“我讨厌这种写法”\n- 某个架构决定背后的真实原因是合规要求\n- 某个团队总在某个外部看板里跟踪问题\n\n这些东西,往往不是你重新扫一遍代码就能立刻知道的。\n\n## 最适合先教的 4 类 memory\n\n### 1. `user`\n\n用户偏好。\n\n例如:\n\n- 喜欢什么代码风格\n- 回答希望简洁还是详细\n- 更偏好什么工具链\n\n### 2. `feedback`\n\n用户明确纠正过你的地方。\n\n例如:\n\n- “不要这样改”\n- “这个判断方式之前错过”\n- “以后遇到这种情况要先做 X”\n\n### 3. `project`\n\n这里只保存**不容易从代码直接重新看出来**的项目约定或背景。\n\n例如:\n\n- 某个设计决定是因为合规而不是技术偏好\n- 某个目录虽然看起来旧,但短期内不能动\n- 某条规则是团队故意定下来的,不是历史残留\n\n### 4. `reference`\n\n外部资源指针。\n\n例如:\n\n- 某个问题单在哪个看板里\n- 某个监控面板在哪里\n- 某个资料库在哪个 URL\n\n## 哪些东西不要存进 memory\n\n这是比“该存什么”更重要的一张表:\n\n| 不要存的东西 | 为什么 |\n|---|---|\n| 文件结构、函数签名、目录布局 | 这些可以重新读代码得到 |\n| 当前任务进度 | 这属于 task / plan,不属于 memory |\n| 临时分支名、当前 PR 号 | 很快会过时 |\n| 修 bug 的具体代码细节 | 代码和提交记录才是准确信息 |\n| 密钥、密码、凭证 | 安全风险 |\n\n这条边界一定要稳。\n\n否则 memory 会从“帮助系统长期变聪明”变成“帮助系统长期产生幻觉”。\n\n## 最小心智模型\n\n```text\nconversation\n |\n | 用户提到一个长期重要信息\n v\nsave_memory\n |\n v\n.memory/\n ├── MEMORY.md # 索引\n ├── prefer_tabs.md\n ├── feedback_tests.md\n └── incident_board.md\n |\n v\n下次新会话开始时重新加载\n```\n\n## 这一章最关键的数据结构\n\n### 1. 单条 memory 文件\n\n最简单也最清晰的做法,是每条 memory 一个文件。\n\n```md\n---\nname: prefer_tabs\ndescription: User prefers tabs for indentation\ntype: user\n---\nThe user explicitly prefers tabs over spaces when editing source files.\n```\n\n这里的 `frontmatter` 可以理解成:\n\n**放在正文前面的结构化元数据。**\n\n它让系统先知道:\n\n- 这条 memory 叫什么\n- 大致是什么\n- 属于哪一类\n\n### 2. 索引文件 `MEMORY.md`\n\n最小实现里,再加一个索引文件就够了:\n\n```md\n# Memory Index\n\n- prefer_tabs: User prefers tabs for indentation [user]\n- avoid_mock_heavy_tests: User dislikes mock-heavy tests [feedback]\n```\n\n索引的作用不是重复保存全部内容。 \n它只是帮系统快速知道“有哪些 memory 可用”。\n\n## 最小实现步骤\n\n### 第一步:定义 memory 类型\n\n```python\nMEMORY_TYPES = (\"user\", \"feedback\", \"project\", \"reference\")\n```\n\n### 第二步:写一个 `save_memory` 工具\n\n最小参数就四个:\n\n- `name`\n- `description`\n- `type`\n- `content`\n\n### 第三步:每条 memory 独立落盘\n\n```python\ndef save_memory(name, description, mem_type, content):\n path = memory_dir / f\"{safe_name}.md\"\n path.write_text(frontmatter + content)\n rebuild_index()\n```\n\n### 第四步:会话开始时重新加载\n\n把 memory 文件重新读出来,拼成一段 memory section。\n\n### 第五步:把 memory section 接进系统输入\n\n这一步会在 `s10` 的 prompt 组装里系统化。\n\n## memory、task、plan、CLAUDE.md 的边界\n\n这是最值得初学者反复区分的一组概念。\n\n### memory\n\n保存跨会话仍有价值的信息。\n\n### task\n\n保存当前工作要做什么、依赖关系如何、进度如何。\n\n### plan\n\n保存“这一轮我要怎么做”的过程性安排。\n\n### CLAUDE.md\n\n保存更稳定、更像长期规则的说明文本。\n\n一个简单判断法:\n\n- 只对这次任务有用:`task / plan`\n- 以后很多会话可能都还会有用:`memory`\n- 属于长期系统级或项目级固定说明:`CLAUDE.md`\n\n## 初学者最容易犯的错\n\n### 错误 1:把代码结构也存进 memory\n\n例如:\n\n- “这个项目有 `src/` 和 `tests/`”\n- “这个函数在 `app.py`”\n\n这些都不该存。\n\n因为系统完全可以重新去读。\n\n### 错误 2:把当前任务状态存进 memory\n\n例如:\n\n- “我现在正在改认证模块”\n- “这个 PR 还有两项没做”\n\n这些是 task / plan,不是 memory。\n\n### 错误 3:把 memory 当成绝对真相\n\nmemory 可能过时。\n\n所以更稳妥的规则是:\n\n**memory 用来提供方向,不用来替代当前观察。**\n\n如果 memory 和当前代码状态冲突,优先相信你现在看到的真实状态。\n\n## 从教学版到高完成度版:记忆系统还要补的 6 条边界\n\n最小教学版只要先把“该存什么 / 不该存什么”讲清楚。 \n但如果你要把系统做到更稳、更像真实工作平台,下面这 6 条边界也必须讲清。\n\n### 1. 不是所有 memory 都该放在同一个作用域\n\n更完整系统里,至少要分清:\n\n- `private`:只属于当前用户或当前 agent 的记忆\n- `team`:整个项目团队都该共享的记忆\n\n一个很稳的教学判断法是:\n\n- `user` 类型,几乎总是 `private`\n- `feedback` 类型,默认 `private`;只有它明确是团队规则时才升到 `team`\n- `project` 和 `reference`,通常更偏向 `team`\n\n这样做的价值是:\n\n- 不把个人偏好误写成团队规范\n- 不把团队规范只锁在某一个人的私有记忆里\n\n### 2. 不只保存“你做错了”,也要保存“这样做是对的”\n\n很多人讲 memory 时,只会想到纠错。\n\n这不够。\n\n因为真正能长期使用的系统,还需要记住:\n\n- 哪种不明显的做法,用户已经明确认可\n- 哪个判断方式,项目里已经被验证有效\n\n也就是说,`feedback` 不只来自负反馈,也来自被验证的正反馈。\n\n如果只存纠错,不存被确认有效的做法,系统会越来越保守,却不一定越来越聪明。\n\n### 3. 有些东西即使用户要求你存,也不该直接存\n\n这条边界一定要说死。\n\n就算用户说“帮我记住”,下面这些东西也不应该直接写进 memory:\n\n- 本周 PR 列表\n- 当前分支名\n- 今天改了哪些文件\n- 某个函数现在在什么路径\n- 当前正在做哪两个子任务\n\n这些内容的问题不是“没有价值”,而是:\n\n- 太容易过时\n- 更适合存在代码、任务板、git 记录里\n- 会把 memory 变成活动日志\n\n更好的做法是追问一句:\n\n> 这里面真正值得长期留下的、非显然的信息到底是什么?\n\n### 4. memory 会漂移,所以回答前要先核对当前状态\n\nmemory 记录的是“曾经成立过的事实”,不是永久真理。\n\n所以更稳的工作方式是:\n\n1. 先把 memory 当作方向提示\n2. 再去读当前文件、当前资源、当前配置\n3. 如果冲突,优先相信你刚观察到的真实状态\n\n这点对初学者尤其重要。 \n因为他们最容易把 memory 当成“已经查证过的答案”。\n\n### 5. 用户说“忽略 memory”时,就当它是空的\n\n这是一个很容易漏讲的行为边界。\n\n如果用户明确说:\n\n- “这次不要参考 memory”\n- “忽略之前的记忆”\n\n那系统更合理的处理不是:\n\n- 一边继续用 memory\n- 一边嘴上说“我知道但先忽略”\n\n而是:\n\n**在这一轮里,按 memory 为空来工作。**\n\n### 6. 推荐具体路径、函数、外部资源前,要再验证一次\n\nmemory 很适合保存:\n\n- 哪个看板通常有上下文\n- 哪个目录以前是关键入口\n- 某种项目约定为什么存在\n\n但在你真的要对用户说:\n\n- “去改 `src/auth.py`”\n- “调用 `AuthManager`”\n- “看这个 URL 就对了”\n\n之前,最好再核对一次。\n\n因为命名、路径、系统入口、外部链接,都是会变的。\n\n所以更稳妥的做法不是:\n\n> memory 里写过,就直接复述。\n\n而是:\n\n> memory 先告诉我去哪里验证;验证完,再给用户结论。\n\n## 教学边界\n\n这章最重要的,不是 memory 以后还能多自动、多复杂,而是先把存储边界讲清楚:\n\n- 什么值得跨会话留下\n- 什么只是当前任务状态,不该进 memory\n- memory 和 task / plan / CLAUDE.md 各自负责什么\n\n只要这几层边界清楚,教学目标就已经达成了。\n\n更复杂的自动整合、作用域分层、自动抽取,都应该放在这个最小边界之后。\n\n## 学完这章后,你应该能回答\n\n- 为什么 memory 不是“什么都记”?\n- 什么样的信息适合跨会话保存?\n- 为什么代码结构和当前任务状态不应该进 memory?\n- memory 和 task / plan / CLAUDE.md 的边界是什么?\n\n---\n\n**一句话记住:memory 保存的是“以后还可能有价值、但当前代码里不容易直接重新看出来”的信息。**\n"
},
{
"version": "s10",
+ "slug": "s10-system-prompt",
"locale": "zh",
- "title": "s10: Team Protocols (团队协议)",
- "content": "# s10: Team Protocols (团队协议)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"队友之间要有统一的沟通规矩\"* -- 一个 request-response 模式驱动所有协商。\n\n## 问题\n\ns09 中队友能干活能通信, 但缺少结构化协调:\n\n**关机**: 直接杀线程会留下写了一半的文件和过期的 config.json。需要握手 -- 领导请求, 队友批准 (收尾退出) 或拒绝 (继续干)。\n\n**计划审批**: 领导说 \"重构认证模块\", 队友立刻开干。高风险变更应该先过审。\n\n两者结构一样: 一方发带唯一 ID 的请求, 另一方引用同一 ID 响应。\n\n## 解决方案\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## 工作原理\n\n1. 领导生成 request_id, 通过收件箱发起关机请求。\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. 队友收到请求后, 用 approve/reject 响应。\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. 计划审批遵循完全相同的模式。队友提交计划 (生成 request_id), 领导审查 (引用同一个 request_id)。\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\n一个 FSM, 两种用途。同样的 `pending -> approved | rejected` 状态机可以套用到任何请求-响应协议上。\n\n## 相对 s09 的变更\n\n| 组件 | 之前 (s09) | 之后 (s10) |\n|----------------|------------------|--------------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan) |\n| 关机 | 仅自然退出 | 请求-响应握手 |\n| 计划门控 | 无 | 提交/审查与审批 |\n| 关联 | 无 | 每个请求一个 request_id |\n| FSM | 无 | pending -> approved/rejected |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. 输入 `/team` 监控状态\n"
+ "title": "s10: System Prompt Construction (系统提示词构建)",
+ "kind": "chapter",
+ "filename": "s10-system-prompt.md",
+ "content": "# s10: System Prompt Construction (系统提示词构建)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > [ s10 ] > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *系统提示词不是一整块大字符串,而是一条可维护的组装流水线。*\n\n## 这一章为什么重要\n\n很多初学者一开始会把 system prompt 写成一大段固定文本。\n\n这样在最小 demo 里当然能跑。\n\n但一旦系统开始长功能,你很快会遇到这些问题:\n\n- 工具列表会变\n- skills 会变\n- memory 会变\n- 当前目录、日期、模式会变\n- 某些提醒只在这一轮有效,不该永远塞进系统说明\n\n所以到了这个阶段,system prompt 不能再当成一块硬编码文本。\n\n它应该升级成:\n\n**由多个来源共同组装出来的一条流水线。**\n\n## 建议联读\n\n- 如果你还习惯把 prompt 看成“神秘大段文本”,先回 [`s00a-query-control-plane.md`](./s00a-query-control-plane.md),重新确认模型输入在进模型前经历了哪些控制层。\n- 如果你想真正稳住“哪些内容先拼、哪些后拼”,建议把 [`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md) 放在手边,这页就是本章最关键的桥。\n- 如果你开始把 system rules、工具说明、memory、runtime state 混成一个大块,先看 [`data-structures.md`](./data-structures.md),把这些输入片段的来源重新拆开。\n\n## 先解释几个名词\n\n### 什么是 system prompt\n\nsystem prompt 是给模型的系统级说明。\n\n它通常负责告诉模型:\n\n- 你是谁\n- 你能做什么\n- 你应该遵守什么规则\n- 你现在处在什么环境里\n\n### 什么是“组装流水线”\n\n意思是:\n\n- 不同信息来自不同地方\n- 最后按顺序拼接成一份输入\n\n它不是一个死字符串,而是一条构建过程。\n\n### 什么是动态信息\n\n有些信息经常变化,例如:\n\n- 当前日期\n- 当前工作目录\n- 本轮新增的提醒\n\n这些信息不适合和所有稳定说明混在一起。\n\n## 最小心智模型\n\n最容易理解的方式,是把 system prompt 想成 6 段:\n\n```text\n1. 核心身份和行为说明\n2. 工具列表\n3. skills 元信息\n4. memory 内容\n5. CLAUDE.md 指令链\n6. 动态环境信息\n```\n\n然后按顺序拼起来:\n\n```text\ncore\n+ tools\n+ skills\n+ memory\n+ claude_md\n+ dynamic_context\n= final system prompt\n```\n\n## 为什么不能把所有东西都硬塞进一个大字符串\n\n因为这样会有三个问题:\n\n### 1. 不好维护\n\n你很难知道:\n\n- 哪一段来自哪里\n- 该修改哪一部分\n- 哪一段是固定说明,哪一段是临时上下文\n\n### 2. 不好测试\n\n如果 system prompt 是一大坨文本,你很难分别测试:\n\n- 工具说明生成得对不对\n- memory 是否被正确拼进去\n- CLAUDE.md 是否被正确读取\n\n### 3. 不好做缓存和动态更新\n\n一些稳定内容其实不需要每轮大变。 \n一些临时内容又只该活一轮。\n\n这就要求你把“稳定块”和“动态块”分开思考。\n\n## 最小实现结构\n\n### 第一步:做一个 builder\n\n```python\nclass SystemPromptBuilder:\n def build(self) -> str:\n parts = []\n parts.append(self._build_core())\n parts.append(self._build_tools())\n parts.append(self._build_skills())\n parts.append(self._build_memory())\n parts.append(self._build_claude_md())\n parts.append(self._build_dynamic())\n return \"\\n\\n\".join(p for p in parts if p)\n```\n\n这就是这一章最核心的设计。\n\n### 第二步:每一段只负责一种来源\n\n例如:\n\n- `_build_tools()` 只负责把工具说明生成出来\n- `_build_memory()` 只负责拿 memory\n- `_build_claude_md()` 只负责读指令文件\n\n这样每一段的职责就很清楚。\n\n## 这一章最关键的结构化边界\n\n### 边界 1:稳定说明 vs 动态提醒\n\n最重要的一组边界是:\n\n- 稳定的系统说明\n- 每轮临时变化的提醒\n\n这两类东西不应该混为一谈。\n\n### 边界 2:system prompt vs system reminder\n\nsystem prompt 适合放:\n\n- 身份\n- 规则\n- 工具\n- 长期约束\n\nsystem reminder 适合放:\n\n- 这一轮才临时需要的补充上下文\n- 当前变动的状态\n\n所以更清晰的做法是:\n\n- 主 system prompt 保持相对稳定\n- 每轮额外变化的内容,用单独的 reminder 方式追加\n\n## 一个实用的教学版本\n\n教学版可以先这样分:\n\n```text\n静态部分:\n- core\n- tools\n- skills\n- memory\n- CLAUDE.md\n\n动态部分:\n- date\n- cwd\n- model\n- current mode\n```\n\n如果你还想再清楚一点,可以加一个边界标记:\n\n```text\n=== DYNAMIC_BOUNDARY ===\n```\n\n它的作用不是神秘魔法。\n\n它只是提醒你:\n\n**上面更稳定,下面更容易变。**\n\n## CLAUDE.md 为什么要单独一段\n\n因为它的角色不是“某一次任务的临时上下文”,而是更稳定的长期说明。\n\n教学仓里,最容易理解的链条是:\n\n1. 用户全局级\n2. 项目根目录级\n3. 当前子目录级\n\n然后全部拼进去,而不是互相覆盖。\n\n这样读者更容易理解“规则来源可以分层叠加”这个思想。\n\n## memory 为什么要和 system prompt 有关系\n\n因为 memory 的本质是:\n\n**把跨会话仍然有价值的信息,重新带回模型当前的工作环境。**\n\n如果保存了 memory,却从来不在系统输入中重新呈现,那它就等于没被真正用起来。\n\n所以 memory 最终一定要进入 prompt 组装链条。\n\n## 初学者最容易混淆的点\n\n### 1. 把 system prompt 讲成一个固定字符串\n\n这会让读者看不到系统是如何长大的。\n\n### 2. 把所有变化信息都塞进 system prompt\n\n这会把稳定说明和临时提醒搅在一起。\n\n### 3. 把 CLAUDE.md、memory、skills 写成同一种东西\n\n它们都可能进入 prompt,但来源和职责不同:\n\n- `skills`:可选能力或知识包\n- `memory`:跨会话记住的信息\n- `CLAUDE.md`:长期规则说明\n\n## 教学边界\n\n这一章先只建立一个核心心智:\n\n**prompt 不是一整块静态文本,而是一条被逐段组装出来的输入流水线。**\n\n所以这里先不要扩到太多外层细节:\n\n- 不要先讲复杂的 section 注册系统\n- 不要先讲缓存与预算\n- 不要先讲所有外部能力如何追加 prompt 说明\n\n只要读者已经能把稳定规则、动态提醒、memory、skills 这些来源看成不同输入段,而不是同一种“大 prompt”,这一章就已经讲到位了。\n\n## 如果你开始分不清 prompt、message、reminder\n\n这是非常正常的。\n\n因为到了这一章,系统输入已经不再只有一个 system prompt 了。 \n它至少会同时出现:\n\n- system prompt blocks\n- 普通对话消息\n- tool_result 消息\n- memory attachment\n- 当前轮 reminder\n\n如果你开始有这类困惑:\n\n- “这个信息到底该放 prompt 里,还是放 message 里?”\n- “为什么 system prompt 不是全部输入?”\n- “reminder 和长期规则到底差在哪?”\n\n建议继续看:\n\n- [`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md)\n- [`entity-map.md`](./entity-map.md)\n\n## 这章和后续章节的关系\n\n这一章像一个汇合点:\n\n- `s05` skills 会汇进来\n- `s09` memory 会汇进来\n- `s07` 的当前模式也可能汇进来\n- `s19` MCP 以后也可能给 prompt 增加说明\n\n所以 `s10` 的价值不是“新加一个功能”, \n而是“把前面长出来的功能组织成一份清楚的系统输入”。\n\n## 学完这章后,你应该能回答\n\n- 为什么 system prompt 不能只是一整块硬编码文本?\n- 为什么要把不同来源拆成独立 section?\n- system prompt 和 system reminder 的边界是什么?\n- memory、skills、CLAUDE.md 为什么都可能进入 prompt,但又不是一回事?\n\n---\n\n**一句话记住:system prompt 的关键不是“写一段很长的话”,而是“把不同来源的信息按清晰边界组装起来”。**\n"
+ },
+ {
+ "version": null,
+ "slug": "s10a-message-prompt-pipeline",
+ "locale": "zh",
+ "title": "s10a: Message & Prompt Pipeline (消息与提示词管道)",
+ "kind": "bridge",
+ "filename": "s10a-message-prompt-pipeline.md",
+ "content": "# s10a: Message & Prompt Pipeline (消息与提示词管道)\n\n> 这篇桥接文档是 `s10` 的扩展。 \n> 它要补清一个很关键的心智:\n>\n> **system prompt 很重要,但它不是模型完整输入的全部。**\n\n## 为什么要补这一篇\n\n`s10` 已经把 system prompt 从“大字符串”升级成“可维护的组装流水线”,这一步非常重要。\n\n但当系统开始长出更多输入来源时,还会继续往前走一步:\n\n它会发现,真正送给模型的输入,不只包含:\n\n- system prompt\n\n还包含:\n\n- 规范化后的 messages\n- memory attachments\n- hook 注入消息\n- system reminder\n- 当前轮次的动态上下文\n\n也就是说,真正的输入更像一条完整管道:\n\n**Prompt Pipeline,而不只是 Prompt Builder。**\n\n## 先解释几个名词\n\n### 什么是 prompt block\n\n你可以把 `prompt block` 理解成:\n\n> system prompt 内部的一段结构化片段。\n\n例如:\n\n- 核心身份说明\n- 工具说明\n- memory section\n- CLAUDE.md section\n\n### 什么是 normalized message\n\n`normalized message` 的意思是:\n\n> 把不同来源、不同格式的消息整理成统一、稳定、可发给模型的消息形式。\n\n为什么需要这一步?\n\n因为系统里可能出现:\n\n- 普通用户消息\n- assistant 回复\n- tool_result\n- 系统提醒\n- attachment 包裹消息\n\n如果不先整理,模型输入层会越来越乱。\n\n### 什么是 system reminder\n\n这在 `s10` 已经提到过。\n\n它不是长期规则,而是:\n\n> 只在当前轮或当前阶段临时追加的一小段系统信息。\n\n## 最小心智模型\n\n把完整输入先理解成下面这条流水线:\n\n```text\n多种输入来源\n |\n +-- system prompt blocks\n +-- messages\n +-- attachments\n +-- reminders\n |\n v\nnormalize\n |\n v\nfinal api payload\n```\n\n这条图里最重要的不是“normalize”这个词有多高级,而是:\n\n**所有来源先分清边界,再在最后一步统一整理。**\n\n## system prompt 为什么不是全部\n\n这是初学者非常容易混的一个点。\n\nsystem prompt 适合放:\n\n- 身份\n- 规则\n- 工具能力描述\n- 长期说明\n\n但有些东西不适合放进去:\n\n- 这一轮刚发生的 tool_result\n- 某个 hook 刚注入的补充说明\n- 某条 memory attachment\n- 当前临时提醒\n\n这些更适合存在消息流里,而不是塞进 prompt block。\n\n## 关键数据结构\n\n### 1. SystemPromptBlock\n\n```python\nblock = {\n \"text\": \"...\",\n \"cache_scope\": None,\n}\n```\n\n最小教学版可以只理解成:\n\n- 一段文本\n- 可选的缓存信息\n\n### 2. PromptParts\n\n```python\nparts = {\n \"core\": \"...\",\n \"tools\": \"...\",\n \"skills\": \"...\",\n \"memory\": \"...\",\n \"claude_md\": \"...\",\n \"dynamic\": \"...\",\n}\n```\n\n### 3. NormalizedMessage\n\n```python\nmessage = {\n \"role\": \"user\" | \"assistant\",\n \"content\": [...],\n}\n```\n\n这里的 `content` 建议直接理解成“块列表”,而不是只是一段字符串。 \n因为后面你会自然遇到:\n\n- text block\n- tool_use block\n- tool_result block\n- attachment-like block\n\n### 4. ReminderMessage\n\n```python\nreminder = {\n \"role\": \"system\",\n \"content\": \"Current mode: plan\",\n}\n```\n\n教学版里你不一定真的要用 `system` role 单独传,但心智上要区分:\n\n- 这是长期 prompt block\n- 还是当前轮临时 reminder\n\n## 最小实现\n\n### 第一步:继续保留 `SystemPromptBuilder`\n\n这一步不能丢。\n\n### 第二步:把消息输入做成独立管道\n\n```python\ndef build_messages(raw_messages, attachments, reminders):\n messages = normalize_messages(raw_messages)\n messages = attach_memory(messages, attachments)\n messages = append_reminders(messages, reminders)\n return messages\n```\n\n### 第三步:在最后一层统一生成 API payload\n\n```python\npayload = {\n \"system\": build_system_prompt(),\n \"messages\": build_messages(...),\n \"tools\": build_tools(...),\n}\n```\n\n这一步特别关键。\n\n它会让读者明白:\n\n**system prompt、messages、tools 是并列输入面,而不是互相替代。**\n\n## 一张更完整但仍然容易理解的图\n\n```text\nPrompt Blocks\n - core\n - tools\n - memory\n - CLAUDE.md\n - dynamic context\n\nMessages\n - user messages\n - assistant messages\n - tool_result messages\n - injected reminders\n\nAttachments\n - memory attachment\n - hook attachment\n\n |\n v\n normalize + assemble\n |\n v\n final API payload\n```\n\n## 什么时候该放在 prompt,什么时候该放在 message\n\n可以先记这个简单判断法:\n\n### 更适合放在 prompt block\n\n- 长期稳定规则\n- 工具列表\n- 长期身份说明\n- CLAUDE.md\n\n### 更适合放在 message 流\n\n- 当前轮 tool_result\n- 刚发生的提醒\n- 当前轮追加的上下文\n- 某次 hook 输出\n\n### 更适合做 attachment\n\n- 大块但可选的补充信息\n- 需要按需展开的说明\n\n## 初学者最容易犯的错\n\n### 1. 把所有东西都塞进 system prompt\n\n这样会让 prompt 越来越脏,也会模糊稳定信息和动态信息的边界。\n\n### 2. 完全不做 normalize\n\n随着消息来源增多,输入格式会越来越不稳定。\n\n### 3. 把 memory、hook、tool_result 都当成一类东西\n\n它们都能影响模型,但进入输入层的方式并不相同。\n\n### 4. 忽略“临时 reminder”这一层\n\n这会让很多本该只活一轮的信息,被错误地塞进长期 system prompt。\n\n## 它和 `s10`、`s19` 的关系\n\n- `s10` 讲 prompt builder\n- 这篇讲 message + prompt 的完整输入管道\n- `s19` 则会把 MCP 带来的额外说明和外部能力继续接入这条管道\n\n也就是说:\n\n**builder 是 prompt 的内部结构,pipeline 是模型输入的整体结构。**\n\n## 教学边界\n\n这篇最重要的,不是罗列所有输入来源,而是先把三条管线边界讲稳:\n\n- 什么该进 system blocks\n- 什么该进 normalized messages\n- 什么只应该作为临时 reminder 或 attachment\n\n只要这三层边界清楚,读者就已经能自己搭出一条可靠输入管道。 \n更细的 cache scope、attachment 去重和大结果外置,都可以放到后续扩展里再补。\n\n## 一句话记住\n\n**真正送给模型的,不只是一个 prompt,而是“prompt blocks + normalized messages + attachments + reminders”组成的输入管道。**\n"
},
{
"version": "s11",
+ "slug": "s11-error-recovery",
"locale": "zh",
- "title": "s11: Autonomous Agents (Autonomous Agent)",
- "content": "# s11: Autonomous Agents (Autonomous Agent)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"队友自己看看板, 有活就认领\"* -- 不需要领导逐个分配, 自组织。\n\n## 问题\n\ns09-s10 中, 队友只在被明确指派时才动。领导得给每个队友写 prompt, 任务看板上 10 个未认领的任务得手动分配。这扩展不了。\n\n真正的自治: 队友自己扫描任务看板, 认领没人做的任务, 做完再找下一个。\n\n一个细节: Context Compact (s06) 后 Agent 可能忘了自己是谁。身份重注入解决这个问题。\n\n## 解决方案\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## 工作原理\n\n1. 队友循环分两个阶段: WORK 和 IDLE。LLM 停止调用工具 (或调用了 `idle`) 时, 进入 IDLE。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. 空闲阶段循环轮询收件箱和任务看板。\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']} \"})\n return True\n return False # timeout -> shutdown\n```\n\n3. 任务看板扫描: 找 pending 状态、无 owner、未被阻塞的任务。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. 身份重注入: 上下文过短 (说明发生了压缩) 时, 在开头插入身份块。\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work. \"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## 相对 s10 的变更\n\n| 组件 | 之前 (s10) | 之后 (s11) |\n|----------------|------------------|----------------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| 自治性 | 领导指派 | 自组织 |\n| 空闲阶段 | 无 | 轮询收件箱 + 任务看板 |\n| 任务认领 | 仅手动 | 自动认领未分配任务 |\n| 身份 | 系统提示 | + 压缩后重注入 |\n| 超时 | 无 | 60 秒空闲 -> 自动关机 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. 输入 `/tasks` 查看带 owner 的任务看板\n5. 输入 `/team` 监控谁在工作、谁在空闲\n"
+ "title": "s11: Error Recovery (错误恢复)",
+ "kind": "chapter",
+ "filename": "s11-error-recovery.md",
+ "content": "# s11: Error Recovery (错误恢复)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > [ s11 ] > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *错误不是例外,而是主循环必须预留出来的一条正常分支。*\n\n## 这一章要解决什么问题\n\n到了 `s10`,你的 agent 已经有了:\n\n- 主循环\n- 工具调用\n- 规划\n- 上下文压缩\n- 权限、hook、memory、system prompt\n\n这时候系统已经不再是一个“只会聊天”的 demo,而是一个真的在做事的程序。\n\n问题也随之出现:\n\n- 模型输出写到一半被截断\n- 上下文太长,请求直接失败\n- 网络暂时抖动,API 超时或限流\n\n如果没有恢复机制,主循环会在第一个错误上直接停住。 \n这对初学者很危险,因为他们会误以为“agent 不稳定是模型的问题”。\n\n实际上,很多失败并不是“任务真的失败了”,而只是:\n\n**这一轮需要换一种继续方式。**\n\n所以这一章的目标只有一个:\n\n**把“报错就崩”升级成“先判断错误类型,再选择恢复路径”。**\n\n## 建议联读\n\n- 如果你开始分不清“为什么这一轮还在继续”,先回 [`s00c-query-transition-model.md`](./s00c-query-transition-model.md),重新确认 transition reason 为什么是独立状态。\n- 如果你在恢复逻辑里又把上下文压缩和错误恢复混成一团,建议顺手回看 [`s06-context-compact.md`](./s06-context-compact.md),区分“为了缩上下文而压缩”和“因为失败而恢复”。\n- 如果你准备继续往 `s12` 走,建议把 [`data-structures.md`](./data-structures.md) 放在旁边,因为后面任务系统会在“恢复状态之外”再引入新的 durable work 状态。\n\n## 先解释几个名词\n\n### 什么叫恢复\n\n恢复,不是把所有错误都藏起来。\n\n恢复的意思是:\n\n- 先判断这是不是临时问题\n- 如果是,就尝试一个有限次数的补救动作\n- 如果补救失败,再把失败明确告诉用户\n\n### 什么叫重试预算\n\n重试预算,就是“最多试几次”。\n\n比如:\n\n- 续写最多 3 次\n- 网络重连最多 3 次\n\n如果没有这个预算,程序就可能无限循环。\n\n### 什么叫状态机\n\n状态机这个词听起来很大,其实意思很简单:\n\n> 一个东西会在几个明确状态之间按规则切换。\n\n在这一章里,主循环就从“普通执行”变成了:\n\n- 正常执行\n- 续写恢复\n- 压缩恢复\n- 退避重试\n- 最终失败\n\n## 最小心智模型\n\n不要把错误恢复想得太神秘。\n\n教学版只需要先区分 3 类问题:\n\n```text\n1. 输出被截断\n 模型还没说完,但 token 用完了\n\n2. 上下文太长\n 请求装不进模型窗口了\n\n3. 临时连接失败\n 网络、超时、限流、服务抖动\n```\n\n对应 3 条恢复路径:\n\n```text\nLLM call\n |\n +-- stop_reason == \"max_tokens\"\n | -> 注入续写提示\n | -> 再试一次\n |\n +-- prompt too long\n | -> 压缩旧上下文\n | -> 再试一次\n |\n +-- timeout / rate limit / transient API error\n -> 等一会儿\n -> 再试一次\n```\n\n这就是最小但正确的恢复模型。\n\n## 关键数据结构\n\n### 1. 恢复状态\n\n```python\nrecovery_state = {\n \"continuation_attempts\": 0,\n \"compact_attempts\": 0,\n \"transport_attempts\": 0,\n}\n```\n\n它的作用不是“记录一切”,而是:\n\n- 防止无限重试\n- 让每种恢复路径各算各的次数\n\n### 2. 恢复决策\n\n```python\n{\n \"kind\": \"continue\" | \"compact\" | \"backoff\" | \"fail\",\n \"reason\": \"why this branch was chosen\",\n}\n```\n\n把“错误长什么样”和“接下来怎么做”分开,会更清楚。\n\n### 3. 续写提示\n\n```python\nCONTINUE_MESSAGE = (\n \"Output limit hit. Continue directly from where you stopped. \"\n \"Do not restart or repeat.\"\n)\n```\n\n这条提示非常重要。\n\n因为如果你只说“继续”,模型经常会:\n\n- 重新总结\n- 重新开头\n- 重复已经输出过的内容\n\n## 最小实现\n\n先写一个恢复选择器:\n\n```python\ndef choose_recovery(stop_reason: str | None, error_text: str | None) -> dict:\n if stop_reason == \"max_tokens\":\n return {\"kind\": \"continue\", \"reason\": \"output truncated\"}\n\n if error_text and \"prompt\" in error_text and \"long\" in error_text:\n return {\"kind\": \"compact\", \"reason\": \"context too large\"}\n\n if error_text and any(word in error_text for word in [\n \"timeout\", \"rate\", \"unavailable\", \"connection\"\n ]):\n return {\"kind\": \"backoff\", \"reason\": \"transient transport failure\"}\n\n return {\"kind\": \"fail\", \"reason\": \"unknown or non-recoverable error\"}\n```\n\n再把它接进主循环:\n\n```python\nwhile True:\n try:\n response = client.messages.create(...)\n decision = choose_recovery(response.stop_reason, None)\n except Exception as e:\n response = None\n decision = choose_recovery(None, str(e).lower())\n\n if decision[\"kind\"] == \"continue\":\n messages.append({\"role\": \"user\", \"content\": CONTINUE_MESSAGE})\n continue\n\n if decision[\"kind\"] == \"compact\":\n messages = auto_compact(messages)\n continue\n\n if decision[\"kind\"] == \"backoff\":\n time.sleep(backoff_delay(...))\n continue\n\n if decision[\"kind\"] == \"fail\":\n break\n\n # 正常工具处理\n```\n\n注意这里的重点不是代码花哨,而是:\n\n- 先分类\n- 再选动作\n- 每条动作有自己的预算\n\n## 三条恢复路径分别在补什么洞\n\n### 路径 1:输出被截断时,做续写\n\n这个问题的本质不是“模型不会”,而是“这一轮输出空间不够”。\n\n所以最小补法是:\n\n1. 追加一条续写消息\n2. 告诉模型不要重来,不要重复\n3. 让主循环继续\n\n```python\nif response.stop_reason == \"max_tokens\":\n if state[\"continuation_attempts\"] >= 3:\n return \"Error: output recovery exhausted\"\n state[\"continuation_attempts\"] += 1\n messages.append({\"role\": \"user\", \"content\": CONTINUE_MESSAGE})\n continue\n```\n\n### 路径 2:上下文太长时,先压缩再重试\n\n这里要先明确一点:\n\n压缩不是“把历史删掉”,而是:\n\n**把旧对话从原文,变成一份仍然可继续工作的摘要。**\n\n最小压缩结果建议至少保留:\n\n- 当前任务是什么\n- 已经做了什么\n- 关键决定是什么\n- 下一步准备做什么\n\n```python\ndef auto_compact(messages: list) -> list:\n summary = summarize_messages(messages)\n return [{\n \"role\": \"user\",\n \"content\": \"This session was compacted. Continue from this summary:\\n\" + summary,\n }]\n```\n\n### 路径 3:连接抖动时,退避重试\n\n“退避”这个词的意思是:\n\n> 别立刻再打一次,而是等一小会儿再试。\n\n为什么要等?\n\n因为这类错误往往是临时拥堵:\n\n- 刚超时\n- 刚限流\n- 服务器刚好抖了一下\n\n如果你瞬间连续重打,只会更容易失败。\n\n```python\ndef backoff_delay(attempt: int) -> float:\n return min(1.0 * (2 ** attempt), 30.0) + random.uniform(0, 1)\n```\n\n## 如何接到主循环里\n\n最干净的接法,是把恢复逻辑放在两个位置:\n\n### 位置 1:模型调用外层\n\n负责处理:\n\n- API 报错\n- 网络错误\n- 超时\n\n### 位置 2:拿到 response 以后\n\n负责处理:\n\n- `stop_reason == \"max_tokens\"`\n- 正常的 `tool_use`\n- 正常的结束\n\n也就是说,主循环现在不只是“调模型 -> 执行工具”,而是:\n\n```text\n1. 调模型\n2. 如果调用报错,判断是否可以恢复\n3. 如果拿到响应,判断是否被截断\n4. 如果需要恢复,就修改 messages 或等待\n5. 如果不需要恢复,再进入正常工具分支\n```\n\n## 初学者最容易犯的错\n\n### 1. 把所有错误都当成一种错误\n\n这样会导致:\n\n- 该续写的去压缩\n- 该等待的去重试\n- 该失败的却无限拖延\n\n### 2. 没有重试预算\n\n没有预算,主循环就可能永远卡在“继续”“继续”“继续”。\n\n### 3. 续写提示写得太模糊\n\n只写一个“continue”通常不够。 \n你要明确告诉模型:\n\n- 不要重复\n- 不要重新总结\n- 直接从中断点接着写\n\n### 4. 压缩后没有告诉模型“这是续场”\n\n如果压缩后只给一份摘要,不告诉模型“这是前文摘要”,模型很可能重新向用户提问。\n\n### 5. 恢复过程完全没有日志\n\n教学系统最好打印类似:\n\n- `[Recovery] continue`\n- `[Recovery] compact`\n- `[Recovery] backoff`\n\n这样读者才看得见主循环到底做了什么。\n\n## 这一章和前后章节怎么衔接\n\n- `s06` 讲的是“什么时候该压缩”\n- `s10` 讲的是“系统提示词怎么组装”\n- `s11` 讲的是“当执行失败时,主循环怎么续下去”\n- `s12` 开始,恢复机制会保护更长、更复杂的任务流\n\n所以 `s11` 的位置非常关键。\n\n它不是外围小功能,而是:\n\n**把 agent 从“能跑”推进到“遇到问题也能继续跑”。**\n\n## 教学边界\n\n这一章先把 3 条最小恢复路径讲稳就够了:\n\n- 输出截断后续写\n- 上下文过长后压缩再试\n- 请求抖动后退避重试\n\n对教学主线来说,重点不是把所有“为什么继续下一轮”的原因一次讲全,而是先让读者明白:\n\n**恢复不是简单 try/except,而是系统知道该怎么续下去。**\n\n更大的 query 续行模型、预算续行、hook 介入这些内容,应该放回控制平面的桥接文档里看,而不是抢掉这章主线。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s11_error_recovery.py\n```\n\n可以试试这些任务:\n\n1. 让模型生成一段特别长的内容,观察它是否会自动续写。\n2. 连续读取一些大文件,观察上下文压缩是否会介入。\n3. 临时制造一次请求失败,观察系统是否会退避重试。\n\n读这一章时,你真正要记住的不是某个具体异常名,而是这条主线:\n\n**错误先分类,恢复再执行,失败最后才暴露给用户。**\n"
},
{
"version": "s12",
+ "slug": "s12-task-system",
+ "locale": "zh",
+ "title": "s12: Task System (任务系统)",
+ "kind": "chapter",
+ "filename": "s12-task-system.md",
+ "content": "# s12: Task System (任务系统)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > [ s12 ] > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *Todo 只能提醒你“有事要做”,任务系统才能告诉你“先做什么、谁在等谁、哪一步还卡着”。*\n\n## 这一章要解决什么问题\n\n`s03` 的 todo 已经能帮 agent 把大目标拆成几步。\n\n但 todo 仍然有两个明显限制:\n\n- 它更像当前会话里的临时清单\n- 它不擅长表达“谁先谁后、谁依赖谁”\n\n例如下面这组工作:\n\n```text\n1. 先写解析器\n2. 再写语义检查\n3. 测试和文档可以并行\n4. 最后整体验收\n```\n\n这已经不是单纯的列表,而是一张“依赖关系图”。\n\n如果没有专门的任务系统,agent 很容易出现这些问题:\n\n- 前置工作没做完,就贸然开始后面的任务\n- 某个任务完成以后,不知道解锁了谁\n- 多个 agent 协作时,没有统一任务板可读\n\n所以这一章要做的升级是:\n\n**把“会话里的 todo”升级成“可持久化的任务图”。**\n\n## 建议联读\n\n- 如果你刚从 `s03` 过来,先回 [`data-structures.md`](./data-structures.md),重新确认 `TodoItem / PlanState` 和 `TaskRecord` 不是同一层状态。\n- 如果你开始把“对象边界”读混,先回 [`entity-map.md`](./entity-map.md),把 message、task、runtime task、teammate 这几层拆开。\n- 如果你准备继续读 `s13`,建议把 [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) 先放在手边,因为从这里开始最容易把 durable task 和 runtime task 混成一个词。\n\n## 先把几个词讲明白\n\n### 什么是任务\n\n这里的 `task` 指的是:\n\n> 一个可以被跟踪、被分配、被完成、被阻塞的小工作单元。\n\n它不是整段用户需求,而是用户需求拆出来的一小块工作。\n\n### 什么是依赖\n\n依赖的意思是:\n\n> 任务 B 必须等任务 A 完成,才能开始。\n\n### 什么是任务图\n\n任务图就是:\n\n> 任务节点 + 依赖连线\n\n你可以把它理解成:\n\n- 点:每个任务\n- 线:谁依赖谁\n\n### 什么是 ready\n\n`ready` 的意思很简单:\n\n> 这条任务现在已经满足开工条件。\n\n也就是:\n\n- 自己还没开始\n- 前置依赖已经全部完成\n\n## 最小心智模型\n\n本章最重要的,不是复杂调度算法,而是先回答 4 个问题:\n\n1. 现在有哪些任务?\n2. 每个任务是什么状态?\n3. 哪些任务还被卡住?\n4. 哪些任务已经可以开始?\n\n只要这 4 个问题能稳定回答,一个最小任务系统就已经成立了。\n\n## 关键数据结构\n\n### 1. TaskRecord\n\n```python\ntask = {\n \"id\": 1,\n \"subject\": \"Write parser\",\n \"description\": \"\",\n \"status\": \"pending\",\n \"blockedBy\": [],\n \"blocks\": [],\n \"owner\": \"\",\n}\n```\n\n每个字段都对应一个很实用的问题:\n\n- `id`:怎么唯一找到这条任务\n- `subject`:这条任务一句话在做什么\n- `description`:还有哪些补充说明\n- `status`:现在走到哪一步\n- `blockedBy`:还在等谁\n- `blocks`:它完成后会解锁谁\n- `owner`:现在由谁来做\n\n### 2. TaskStatus\n\n教学版先只保留最少 4 个状态:\n\n```text\npending -> in_progress -> completed\ndeleted\n```\n\n解释如下:\n\n- `pending`:还没开始\n- `in_progress`:已经有人在做\n- `completed`:已经做完\n- `deleted`:逻辑删除,不再参与工作流\n\n### 3. Ready Rule\n\n这是本章最关键的一条判断规则:\n\n```python\ndef is_ready(task: dict) -> bool:\n return task[\"status\"] == \"pending\" and not task[\"blockedBy\"]\n```\n\n如果你把这条规则讲明白,读者就会第一次真正明白:\n\n**任务系统的核心不是“保存清单”,而是“判断什么时候能开工”。**\n\n## 最小实现\n\n### 第一步:让任务落盘\n\n不要只把任务放在 `messages` 里。 \n教学版最简单的做法,就是“一任务一文件”:\n\n```text\n.tasks/\n task_1.json\n task_2.json\n task_3.json\n```\n\n创建任务时,直接写成一条 JSON 记录:\n\n```python\nclass TaskManager:\n def create(self, subject: str, description: str = \"\") -> dict:\n task = {\n \"id\": self._next_id(),\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"blockedBy\": [],\n \"blocks\": [],\n \"owner\": \"\",\n }\n self._save(task)\n return task\n```\n\n### 第二步:把依赖关系写成双向\n\n如果任务 A 完成后会解锁任务 B,最好同时维护两边:\n\n- A 的 `blocks` 里有 B\n- B 的 `blockedBy` 里有 A\n\n```python\ndef add_dependency(self, task_id: int, blocks_id: int):\n task = self._load(task_id)\n blocked = self._load(blocks_id)\n\n if blocks_id not in task[\"blocks\"]:\n task[\"blocks\"].append(blocks_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n\n self._save(task)\n self._save(blocked)\n```\n\n这样做的好处是:\n\n- 从前往后读得懂\n- 从后往前也读得懂\n\n### 第三步:完成任务时自动解锁后续任务\n\n```python\ndef complete(self, task_id: int):\n task = self._load(task_id)\n task[\"status\"] = \"completed\"\n self._save(task)\n\n for other in self._all_tasks():\n if task_id in other[\"blockedBy\"]:\n other[\"blockedBy\"].remove(task_id)\n self._save(other)\n```\n\n这一步非常关键。\n\n因为它说明:\n\n**任务系统不是静态记录表,而是会随着完成事件自动推进的工作图。**\n\n### 第四步:把任务工具接给模型\n\n教学版最小工具集建议先只做这 4 个:\n\n- `task_create`\n- `task_update`\n- `task_get`\n- `task_list`\n\n这样模型就能:\n\n- 新建任务\n- 更新状态\n- 看单条任务\n- 看整张任务板\n\n## 如何接到主循环里\n\n从 `s12` 开始,主循环第一次拥有了“会话外状态”。\n\n典型流程是:\n\n```text\n用户提出复杂目标\n ->\n模型决定先拆任务\n ->\n调用 task_create / task_update\n ->\n任务落到 .tasks/\n ->\n后续轮次继续读取并推进\n```\n\n这里要牢牢记住一句话:\n\n**todo 更像本轮计划,task 更像长期工作板。**\n\n## 这一章和 s03、s13 的边界\n\n这一层边界必须讲清楚,不然后面一定会混。\n\n### 和 `s03` 的区别\n\n| 机制 | 更适合什么 |\n|---|---|\n| `todo` | 当前会话里快速列步骤 |\n| `task` | 持久化工作、依赖关系、多人协作 |\n\n如果只是“先看文件,再改代码,再跑测试”,todo 往往就够。 \n如果是“跨很多轮、多人协作、还要管依赖”,就要上 task。\n\n### 和 `s13` 的区别\n\n本章的 `task` 指的是:\n\n> 一条工作目标\n\n它回答的是:\n\n- 要做什么\n- 现在做到哪一步\n- 谁在等谁\n\n它不是:\n\n- 某个正在后台跑的 `pytest`\n- 某个正在执行的 worker\n- 某条当前活着的执行线程\n\n后面这些属于下一章要讲的:\n\n> 运行中的执行任务\n\n## 初学者最容易犯的错\n\n### 1. 只会创建任务,不会维护依赖\n\n那最后得到的还是一张普通清单,不是任务图。\n\n### 2. 任务只放内存,不落盘\n\n系统一重启,整个工作结构就没了。\n\n### 3. 完成任务后不自动解锁后续任务\n\n这样系统永远不知道下一步谁可以开工。\n\n### 4. 把工作目标和运行中的执行混成一层\n\n这会导致后面 `s13` 的后台任务系统很难讲清。\n\n## 教学边界\n\n这一章先要守住的,不是任务平台以后还能长出多少管理功能,而是任务记录本身的最小主干:\n\n- `TaskRecord`\n- 依赖关系\n- 持久化\n- 就绪判断\n\n只要读者已经能把 todo 和 task、工作目标和运行执行明确分开,并且能手写一个会解锁后续任务的最小任务图,这章就已经讲到位了。\n\n## 学完这一章,你应该真正掌握什么\n\n学完以后,你应该能独立说清这几件事:\n\n1. 任务系统比 todo 多出来的核心能力,是“依赖关系”和“持久化”。\n2. `TaskRecord` 是本章最关键的数据结构。\n3. `blockedBy` / `blocks` 让系统能看懂前后关系。\n4. `is_ready()` 让系统能判断“谁现在可以开始”。\n\n如果这 4 件事都已经清楚,说明你已经能从 0 到 1 手写一个最小任务系统。\n\n## 下一章学什么\n\n这一章解决的是:\n\n> 工作目标如何被长期组织。\n\n下一章 `s13` 要解决的是:\n\n> 某个慢命令正在后台跑时,主循环怎么继续前进。\n\n也就是从“工作图”走向“运行时执行层”。\n"
+ },
+ {
+ "version": "s13",
+ "slug": "s13-background-tasks",
+ "locale": "zh",
+ "title": "s13: Background Tasks (后台任务)",
+ "kind": "chapter",
+ "filename": "s13-background-tasks.md",
+ "content": "# s13: Background Tasks (后台任务)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > [ s13 ] > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *慢命令可以在旁边等,主循环不必陪着发呆。*\n\n## 这一章要解决什么问题\n\n前面几章里,工具调用基本都是:\n\n```text\n模型发起\n ->\n立刻执行\n ->\n立刻返回结果\n```\n\n这对短命令没有问题。 \n但一旦遇到这些慢操作,就会卡住:\n\n- `npm install`\n- `pytest`\n- `docker build`\n- 大型代码生成或检查任务\n\n如果主循环一直同步等待,会出现两个坏处:\n\n- 模型在等待期间什么都做不了\n- 用户明明还想继续别的工作,却被整轮流程堵住\n\n所以这一章要解决的是:\n\n**把“慢执行”移到后台,让主循环继续推进别的事情。**\n\n## 建议联读\n\n- 如果你还没有彻底稳住“任务目标”和“执行槽位”是两层对象,先看 [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)。\n- 如果你开始分不清哪些状态该落在 `RuntimeTaskRecord`、哪些还应留在任务板,回看 [`data-structures.md`](./data-structures.md)。\n- 如果你开始把后台执行理解成“另一条主循环”,先看 [`s02b-tool-execution-runtime.md`](./s02b-tool-execution-runtime.md),重新校正“并行的是执行与等待,不是主循环本身”。\n\n## 先把几个词讲明白\n\n### 什么叫前台\n\n前台指的是:\n\n> 主循环这轮发起以后,必须立刻等待结果的执行路径。\n\n### 什么叫后台\n\n后台不是神秘系统。 \n后台只是说:\n\n> 命令先在另一条执行线上跑,主循环先去做别的事。\n\n### 什么叫通知队列\n\n通知队列就是一条“稍后再告诉主循环”的收件箱。\n\n后台任务完成以后,不是直接把全文硬塞回模型, \n而是先写一条摘要通知,等下一轮再统一带回去。\n\n## 最小心智模型\n\n这一章最关键的句子是:\n\n**主循环仍然只有一条,并行的是等待,不是主循环本身。**\n\n可以把结构画成这样:\n\n```text\n主循环\n |\n +-- background_run(\"pytest\")\n | -> 立刻返回 task_id\n |\n +-- 继续别的工作\n |\n +-- 下一轮模型调用前\n -> drain_notifications()\n -> 把摘要注入 messages\n\n后台执行线\n |\n +-- 真正执行 pytest\n +-- 完成后写入通知队列\n```\n\n如果读者能牢牢记住这张图,后面扩展成更复杂的异步系统也不会乱。\n\n## 关键数据结构\n\n### 1. RuntimeTaskRecord\n\n```python\ntask = {\n \"id\": \"a1b2c3d4\",\n \"command\": \"pytest\",\n \"status\": \"running\",\n \"started_at\": 1710000000.0,\n \"result_preview\": \"\",\n \"output_file\": \"\",\n}\n```\n\n这些字段分别表示:\n\n- `id`:唯一标识\n- `command`:正在跑什么命令\n- `status`:运行中、完成、失败、超时\n- `started_at`:什么时候开始\n- `result_preview`:先给模型看的简短摘要\n- `output_file`:完整输出写到了哪里\n\n教学版再往前走一步时,建议把它直接落成两份文件:\n\n```text\n.runtime-tasks/\n a1b2c3d4.json # RuntimeTaskRecord\n a1b2c3d4.log # 完整输出\n```\n\n这样读者会更容易理解:\n\n- `json` 记录的是运行状态\n- `log` 保存的是完整产物\n- 通知只负责把 `preview` 带回主循环\n\n### 2. Notification\n\n```python\nnotification = {\n \"type\": \"background_completed\",\n \"task_id\": \"a1b2c3d4\",\n \"status\": \"completed\",\n \"preview\": \"tests passed\",\n}\n```\n\n通知只负责做一件事:\n\n> 告诉主循环“有结果回来了,你要不要看”。\n\n它不是完整日志本体。\n\n## 最小实现\n\n### 第一步:登记后台任务\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self.notifications = []\n self.lock = threading.Lock()\n```\n\n这里最少要有两块状态:\n\n- `tasks`:当前有哪些后台任务\n- `notifications`:哪些结果已经回来,等待主循环领取\n\n### 第二步:启动后台执行线\n\n“线程”这个词第一次见可能会有点紧张。 \n你可以先把它理解成:\n\n> 同一个程序里,另一条可以独立往前跑的执行线。\n\n```python\ndef run(self, command: str) -> str:\n task_id = new_id()\n self.tasks[task_id] = {\n \"id\": task_id,\n \"command\": command,\n \"status\": \"running\",\n }\n\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command),\n daemon=True,\n )\n thread.start()\n return task_id\n```\n\n这一步最重要的不是线程本身,而是:\n\n**主循环拿到 `task_id` 后就可以先继续往前走。**\n\n### 第三步:完成后写通知\n\n```python\ndef _execute(self, task_id: str, command: str):\n try:\n result = subprocess.run(..., timeout=300)\n status = \"completed\"\n preview = (result.stdout + result.stderr)[:500]\n except subprocess.TimeoutExpired:\n status = \"timeout\"\n preview = \"command timed out\"\n\n with self.lock:\n self.tasks[task_id][\"status\"] = status\n self.notifications.append({\n \"type\": \"background_completed\",\n \"task_id\": task_id,\n \"status\": status,\n \"preview\": preview,\n })\n```\n\n这里体现的思想很重要:\n\n**后台执行负责产出结果,通知队列负责把结果送回主循环。**\n\n### 第四步:下一轮前排空通知\n\n```python\ndef before_model_call(messages: list):\n notifications = bg.drain_notifications()\n if not notifications:\n return\n\n text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']} - {n['preview']}\"\n for n in notifications\n )\n messages.append({\"role\": \"user\", \"content\": text})\n```\n\n这样模型在下一轮就会知道:\n\n- 哪个后台任务完成了\n- 是成功、失败还是超时\n- 如果要看全文,该再去读文件\n\n## 为什么完整输出不要直接塞回 prompt\n\n这是本章必须讲透的点。\n\n如果后台任务输出几万行日志,你不能每次都把全文塞回上下文。 \n更稳的做法是:\n\n1. 完整输出写磁盘\n2. 通知里只放简短摘要\n3. 模型真的要看全文时,再调用 `read_file`\n\n这背后的心智很重要:\n\n**通知负责提醒,文件负责存原文。**\n\n## 如何接到主循环里\n\n从 `s13` 开始,主循环多出一个标准前置步骤:\n\n```text\n1. 先排空通知队列\n2. 再调用模型\n3. 普通工具照常同步执行\n4. 如果模型调用 background_run,就登记后台任务并立刻返回 task_id\n5. 下一轮再把后台结果带回模型\n```\n\n教学版最小工具建议先做两个:\n\n- `background_run`\n- `background_check`\n\n这样已经足够支撑最小异步执行闭环。\n\n## 这一章和任务系统的边界\n\n这是本章最容易和 `s12` 混掉的地方。\n\n### `s12` 的 task 是什么\n\n`s12` 里的 `task` 是:\n\n> 工作目标\n\n它关心的是:\n\n- 要做什么\n- 谁依赖谁\n- 现在总体进度如何\n\n### `s13` 的 background task 是什么\n\n本章里的后台任务是:\n\n> 正在运行的执行单元\n\n它关心的是:\n\n- 哪个命令正在跑\n- 跑到什么状态\n- 结果什么时候回来\n\n所以最稳的记法是:\n\n- `task` 更像工作板\n- `background task` 更像运行中的作业\n\n两者相关,但不是同一个东西。\n\n## 初学者最容易犯的错\n\n### 1. 以为“后台”就是更复杂的主循环\n\n不是。 \n主循环仍然尽量保持单主线。\n\n### 2. 只开线程,不登记状态\n\n这样任务一多,你根本不知道:\n\n- 谁还在跑\n- 谁已经完成\n- 谁失败了\n\n### 3. 把长日志全文塞进上下文\n\n上下文很快就会被撑爆。\n\n### 4. 把 `s12` 的工作目标和本章的运行任务混为一谈\n\n这会让后面多 agent 和调度章节全部打结。\n\n## 教学边界\n\n这一章只需要先把一个最小运行时模式讲清楚:\n\n- 慢工作在后台跑\n- 主循环继续保持单主线\n- 结果通过通知路径在后面回到模型\n\n只要这条模式稳了,线程池、更多 worker 类型、更复杂的事件系统都可以后补。\n\n这章真正要让读者守住的是:\n\n**并行的是等待与执行槽位,不是主循环本身。**\n\n## 学完这一章,你应该真正掌握什么\n\n学完以后,你应该能独立复述下面几句话:\n\n1. 主循环只有一条,并行的是等待,不是主循环本身。\n2. 后台任务至少需要“任务表 + 通知队列”两块状态。\n3. `background_run` 应该立刻返回 `task_id`,而不是同步卡住。\n4. 通知只放摘要,完整输出放文件。\n\n如果这 4 句话都已经非常清楚,说明你已经掌握了后台任务系统的核心。\n\n## 下一章学什么\n\n这一章解决的是:\n\n> 慢命令如何在后台运行。\n\n下一章 `s14` 要解决的是:\n\n> 如果连“启动后台任务”这件事都不一定由当前用户触发,而是由时间触发,该怎么做。\n\n也就是从“异步运行”继续走向“定时触发”。\n"
+ },
+ {
+ "version": null,
+ "slug": "s13a-runtime-task-model",
+ "locale": "zh",
+ "title": "s13a: Runtime Task Model (运行时任务模型)",
+ "kind": "bridge",
+ "filename": "s13a-runtime-task-model.md",
+ "content": "# s13a: Runtime Task Model (运行时任务模型)\n\n> 这篇桥接文档专门解决一个非常容易混淆的问题:\n>\n> **任务板里的 task,和后台/队友/监控这些“正在运行的任务”,不是同一个东西。**\n\n## 建议怎么联读\n\n这篇最好夹在下面几份文档中间读:\n\n- 先看 [`s12-task-system.md`](./s12-task-system.md),确认工作图任务在讲什么。\n- 再看 [`s13-background-tasks.md`](./s13-background-tasks.md),确认后台执行在讲什么。\n- 如果词开始混,再回 [`glossary.md`](./glossary.md)。\n- 如果想把字段和状态彻底对上,再对照 [`data-structures.md`](./data-structures.md) 和 [`entity-map.md`](./entity-map.md)。\n\n## 为什么必须单独讲这一篇\n\n主线里:\n\n- `s12` 讲的是任务系统\n- `s13` 讲的是后台任务\n\n这两章各自都没错。 \n但如果不额外补一层桥接,很多读者很快就会把两种“任务”混在一起。\n\n例如:\n\n- 任务板里的 “实现 auth 模块”\n- 后台执行里的 “正在跑 pytest”\n- 队友执行里的 “alice 正在做代码改动”\n\n这些都可以叫“任务”,但它们不在同一层。\n\n为了让整个仓库接近满分,这一层必须讲透。\n\n## 先解释两个完全不同的“任务”\n\n### 第一种:工作图任务\n\n这就是 `s12` 里的任务板节点。\n\n它回答的是:\n\n- 要做什么\n- 谁依赖谁\n- 谁认领了\n- 当前进度如何\n\n它更像:\n\n> 工作计划中的一个可跟踪工作单元。\n\n### 第二种:运行时任务\n\n这类任务回答的是:\n\n- 现在有什么执行单元正在跑\n- 它是什么类型\n- 是在运行、完成、失败还是被杀掉\n- 输出文件在哪\n\n它更像:\n\n> 系统当前活着的一条执行槽位。\n\n## 最小心智模型\n\n你可以先把两者画成两张表:\n\n```text\n工作图任务\n - durable\n - 面向目标与依赖\n - 生命周期更长\n\n运行时任务\n - runtime\n - 面向执行与输出\n - 生命周期更短\n```\n\n它们的关系不是“二选一”,而是:\n\n```text\n一个工作图任务\n 可以派生\n一个或多个运行时任务\n```\n\n例如:\n\n```text\n工作图任务:\n \"实现 auth 模块\"\n\n运行时任务:\n 1. 后台跑测试\n 2. 启动一个 coder teammate\n 3. 监控一个 MCP 服务返回结果\n```\n\n## 为什么这层区别非常重要\n\n如果不区分这两层,后面很多章节都会开始缠在一起:\n\n- `s13` 的后台任务会和 `s12` 的任务板混淆\n- `s15-s17` 的队友任务会不知道该挂在哪\n- `s18` 的 worktree 到底绑定哪一层任务,也会变模糊\n\n所以你要先记住一句:\n\n**工作图任务管“目标”,运行时任务管“执行”。**\n\n## 关键数据结构\n\n### 1. WorkGraphTaskRecord\n\n这就是 `s12` 里的那条 durable task。\n\n```python\ntask = {\n \"id\": 12,\n \"subject\": \"Implement auth module\",\n \"status\": \"in_progress\",\n \"blockedBy\": [],\n \"blocks\": [13],\n \"owner\": \"alice\",\n \"worktree\": \"auth-refactor\",\n}\n```\n\n### 2. RuntimeTaskState\n\n教学版可以先用这个最小形状:\n\n```python\nruntime_task = {\n \"id\": \"b8k2m1qz\",\n \"type\": \"local_bash\",\n \"status\": \"running\",\n \"description\": \"Run pytest\",\n \"start_time\": 1710000000.0,\n \"end_time\": None,\n \"output_file\": \".task_outputs/b8k2m1qz.txt\",\n \"notified\": False,\n}\n```\n\n这里的字段重点在于:\n\n- `type`:它是什么执行单元\n- `status`:它现在在运行态还是终态\n- `output_file`:它的产出在哪\n- `notified`:结果有没有回通知系统\n\n### 3. RuntimeTaskType\n\n你不必在教学版里一次性实现所有类型, \n但应该让读者知道“运行时任务”是一个类型族,而不只是 `background shell` 一种。\n\n最小类型表可以先这样讲:\n\n```text\nlocal_bash\nlocal_agent\nremote_agent\nin_process_teammate\nmonitor\nworkflow\n```\n\n## 最小实现\n\n### 第一步:继续保留 `s12` 的任务板\n\n这一层不要动。\n\n### 第二步:单独加一个 RuntimeTaskManager\n\n```python\nclass RuntimeTaskManager:\n def __init__(self):\n self.tasks = {}\n```\n\n### 第三步:后台运行时创建 runtime task\n\n```python\ndef spawn_bash_task(command: str):\n task_id = new_runtime_id()\n runtime_tasks[task_id] = {\n \"id\": task_id,\n \"type\": \"local_bash\",\n \"status\": \"running\",\n \"description\": command,\n }\n```\n\n### 第四步:必要时把 runtime task 关联回工作图任务\n\n```python\nruntime_tasks[task_id][\"work_graph_task_id\"] = 12\n```\n\n这一步不是必须一上来就做,但如果系统进入多 agent / worktree 阶段,就会越来越重要。\n\n## 一张真正清楚的图\n\n```text\nWork Graph\n task #12: Implement auth module\n |\n +-- spawns runtime task A: local_bash (pytest)\n +-- spawns runtime task B: local_agent (coder worker)\n +-- spawns runtime task C: monitor (watch service status)\n\nRuntime Task Layer\n A/B/C each have:\n - own runtime ID\n - own status\n - own output\n - own lifecycle\n```\n\n## 它和后面章节怎么连\n\n这层一旦讲清楚,后面几章会顺很多:\n\n- `s13` 后台命令,本质上是 runtime task\n- `s15-s17` 队友/agent,也可以看成 runtime task 的一种\n- `s18` worktree 主要绑定工作图任务,但也会影响运行时执行环境\n- `s19` 某些外部监控或异步调用,也可能落成 runtime task\n\n所以后面只要你看到“有东西在后台活着并推进工作”,都可以先问自己两句:\n\n- 它是不是某个 durable work graph task 派生出来的执行槽位。\n- 它的状态是不是应该放在 runtime layer,而不是任务板节点里。\n\n## 初学者最容易犯的错\n\n### 1. 把后台 shell 直接写成任务板状态\n\n这样 durable task 和 runtime state 就混在一起了。\n\n### 2. 认为一个工作图任务只能对应一个运行时任务\n\n现实里很常见的是一个工作目标派生多个执行单元。\n\n### 3. 用同一套状态名描述两层对象\n\n例如:\n\n- 工作图任务的 `pending / in_progress / completed`\n- 运行时任务的 `running / completed / failed / killed`\n\n这两套状态最好不要混。\n\n### 4. 忽略 output file 和 notified 这类运行时字段\n\n工作图任务不太关心这些,运行时任务非常关心。\n\n## 教学边界\n\n这篇最重要的,不是把运行时字段一次加满,而是先把下面三层对象彻底拆开:\n\n- durable task 是长期工作目标\n- runtime task 是当前活着的执行槽位\n- notification / output 只是运行时把结果带回来的通道\n\n运行时任务类型枚举、增量输出 offset、槽位清理策略,都可以等你先把这三层边界手写清楚以后再扩展。\n\n## 一句话记住\n\n**工作图任务管“长期目标和依赖”,运行时任务管“当前活着的执行单元和输出”。**\n\n**`s12` 的 task 是工作图节点,`s13+` 的 runtime task 是系统里真正跑起来的执行单元。**\n"
+ },
+ {
+ "version": "s14",
+ "slug": "s14-cron-scheduler",
+ "locale": "zh",
+ "title": "s14: Cron Scheduler (定时调度)",
+ "kind": "chapter",
+ "filename": "s14-cron-scheduler.md",
+ "content": "# s14: Cron Scheduler (定时调度)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > [ s14 ] > s15 > s16 > s17 > s18 > s19`\n\n> *如果后台任务解决的是“稍后回来拿结果”,那么定时调度解决的是“将来某个时间再开始做事”。*\n\n## 这一章要解决什么问题\n\n`s13` 已经让系统学会了把慢命令放到后台。\n\n但后台任务默认还是“现在就启动”。\n\n很多真实需求并不是现在做,而是:\n\n- 每天晚上跑一次测试\n- 每周一早上生成报告\n- 30 分钟后提醒我继续检查某个结果\n\n如果没有调度能力,用户就只能每次手动再说一遍。 \n这会让系统看起来像“只能响应当下”,而不是“能安排未来工作”。\n\n所以这一章要加上的能力是:\n\n**把一条未来要执行的意图,先记下来,等时间到了再触发。**\n\n## 建议联读\n\n- 如果你还没完全分清 `schedule`、`task`、`runtime task` 各自表示什么,先回 [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)。\n- 如果你想重新看清“一条触发最终是怎样回到主循环里的”,可以配合读 [`s00b-one-request-lifecycle.md`](./s00b-one-request-lifecycle.md)。\n- 如果你开始把“未来触发”误以为“又多了一套执行系统”,先回 [`data-structures.md`](./data-structures.md),确认调度记录和运行时记录不是同一个表。\n\n## 先解释几个名词\n\n### 什么是调度器\n\n调度器,就是一段专门负责“看时间、查任务、决定是否触发”的代码。\n\n### 什么是 cron 表达式\n\n`cron` 是一种很常见的定时写法。\n\n最小 5 字段版本长这样:\n\n```text\n分 时 日 月 周\n```\n\n例如:\n\n```text\n*/5 * * * * 每 5 分钟\n0 9 * * 1 每周一 9 点\n30 14 * * * 每天 14:30\n```\n\n如果你是初学者,不用先背全。\n\n这一章真正重要的不是语法细节,而是:\n\n> “系统如何把一条未来任务记住,并在合适时刻放回主循环。”\n\n### 什么是持久化调度\n\n持久化,意思是:\n\n> 就算程序重启,这条调度记录还在。\n\n## 最小心智模型\n\n先把调度看成 3 个部分:\n\n```text\n1. 调度记录\n2. 定时检查器\n3. 通知队列\n```\n\n它们之间的关系是:\n\n```text\nschedule_create(...)\n ->\n把记录写到列表或文件里\n ->\n后台检查器每分钟看一次“现在是否匹配”\n ->\n如果匹配,就把 prompt 放进通知队列\n ->\n主循环下一轮把它当成新的用户消息喂给模型\n```\n\n这条链路很重要。\n\n因为它说明了一点:\n\n**定时调度并不是另一套 agent。它最终还是回到同一条主循环。**\n\n## 关键数据结构\n\n### 1. ScheduleRecord\n\n```python\nschedule = {\n \"id\": \"job_001\",\n \"cron\": \"0 9 * * 1\",\n \"prompt\": \"Run the weekly status report.\",\n \"recurring\": True,\n \"durable\": True,\n \"created_at\": 1710000000.0,\n \"last_fired_at\": None,\n}\n```\n\n字段含义:\n\n- `id`:唯一编号\n- `cron`:定时规则\n- `prompt`:到点后要注入主循环的提示\n- `recurring`:是不是反复触发\n- `durable`:是否落盘保存\n- `created_at`:创建时间\n- `last_fired_at`:上次触发时间\n\n### 2. 调度通知\n\n```python\n{\n \"type\": \"scheduled_prompt\",\n \"schedule_id\": \"job_001\",\n \"prompt\": \"Run the weekly status report.\",\n}\n```\n\n### 3. 检查周期\n\n教学版建议先按“分钟级”思考,而不是“秒级严格精度”。\n\n因为大多数 cron 任务本来就不是为了卡秒执行。\n\n## 最小实现\n\n### 第一步:允许创建一条调度记录\n\n```python\ndef create(self, cron_expr: str, prompt: str, recurring: bool = True):\n job = {\n \"id\": new_id(),\n \"cron\": cron_expr,\n \"prompt\": prompt,\n \"recurring\": recurring,\n \"created_at\": time.time(),\n \"last_fired_at\": None,\n }\n self.jobs.append(job)\n return job\n```\n\n### 第二步:写一个定时检查循环\n\n```python\ndef check_loop(self):\n while True:\n now = datetime.now()\n self.check_jobs(now)\n time.sleep(60)\n```\n\n最小教学版先每分钟检查一次就足够。\n\n### 第三步:时间到了就发通知\n\n```python\ndef check_jobs(self, now):\n for job in self.jobs:\n if cron_matches(job[\"cron\"], now):\n self.queue.put({\n \"type\": \"scheduled_prompt\",\n \"schedule_id\": job[\"id\"],\n \"prompt\": job[\"prompt\"],\n })\n job[\"last_fired_at\"] = now.timestamp()\n```\n\n### 第四步:主循环像处理后台通知一样处理定时通知\n\n```python\nnotifications = scheduler.drain()\nfor item in notifications:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"[scheduled:{item['schedule_id']}] {item['prompt']}\",\n })\n```\n\n这样一来,定时任务最终还是由模型接手继续做。\n\n## 为什么这章放在后台任务之后\n\n因为这两章解决的问题很接近,但不是同一件事。\n\n可以这样区分:\n\n| 机制 | 回答的问题 |\n|---|---|\n| 后台任务 | “已经启动的慢操作,结果什么时候回来?” |\n| 定时调度 | “一件事应该在未来什么时候开始?” |\n\n这个顺序对初学者很友好。\n\n因为先理解“异步结果回来”,再理解“未来触发一条新意图”,心智会更顺。\n\n## 初学者最容易犯的错\n\n### 1. 一上来沉迷 cron 语法细节\n\n这章最容易跑偏到一大堆表达式规则。\n\n但教学主线其实不是“背语法”,而是:\n\n**调度记录如何进入通知队列,又如何回到主循环。**\n\n### 2. 没有 `last_fired_at`\n\n没有这个字段,系统很容易在短时间内重复触发同一条任务。\n\n### 3. 只放内存,不支持落盘\n\n如果用户希望“明天再提醒我”,程序一重启就没了,这就不是真正的调度。\n\n### 4. 把调度触发结果直接在后台默默执行\n\n教学主线里更清楚的做法是:\n\n- 时间到了\n- 先发通知\n- 再让主循环决定怎么处理\n\n这样系统行为更透明,读者也更容易理解。\n\n### 5. 误以为定时任务必须绝对准点\n\n很多初学者会把调度想成秒表。\n\n但这里更重要的是“有计划地触发”,而不是追求毫秒级精度。\n\n## 如何接到整个系统里\n\n到了这一章,系统已经有两条重要的“外部事件输入”:\n\n- 后台任务完成通知\n- 定时调度触发通知\n\n二者最好的统一方式是:\n\n**都走通知队列,再在下一次模型调用前统一注入。**\n\n这样主循环结构不会越来越乱。\n\n## 教学边界\n\n这一章先讲清一条主线就够了:\n\n**调度器做的是“记住未来”,不是“取代主循环”。**\n\n所以教学版先只需要让读者看清:\n\n- schedule record 负责记住未来何时开工\n- 真正执行工作时,仍然回到任务系统和通知队列\n- 它只是多了一种“开始入口”,不是多了一条新的主循环\n\n多进程锁、漏触发补报、自然语言时间语法这些,都应该排在这条主线之后。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s14_cron_scheduler.py\n```\n\n可以试试这些任务:\n\n1. 建一个每分钟触发一次的小任务,观察它是否会按时进入通知队列。\n2. 建一个只触发一次的任务,确认触发后是否会消失。\n3. 重启程序,检查持久化的调度记录是否还在。\n\n读完这一章,你应该能自己说清这句话:\n\n**后台任务是在“等结果”,定时调度是在“等开始”。**\n"
+ },
+ {
+ "version": "s15",
+ "slug": "s15-agent-teams",
+ "locale": "zh",
+ "title": "s15: Agent Teams (智能体团队)",
+ "kind": "chapter",
+ "filename": "s15-agent-teams.md",
+ "content": "# s15: Agent Teams (智能体团队)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > [ s15 ] > s16 > s17 > s18 > s19`\n\n> *子 agent 适合一次性委派;团队系统解决的是“有人长期在线、能继续接活、能互相协作”。*\n\n## 这一章要解决什么问题\n\n`s04` 的 subagent 已经能帮主 agent 拆小任务。\n\n但 subagent 有一个很明显的边界:\n\n```text\n创建 -> 执行 -> 返回摘要 -> 消失\n```\n\n这很适合一次性的小委派。 \n可如果你想做这些事,就不够用了:\n\n- 让一个测试 agent 长期待命\n- 让两个 agent 长期分工\n- 让某个 agent 未来收到新任务后继续工作\n\n也就是说,系统现在缺的不是“再开一个模型调用”,而是:\n\n**一批有身份、能长期存在、能反复协作的队友。**\n\n## 建议联读\n\n- 如果你还在把 teammate 和 `s04` 的 subagent 混成一类,先回 [`entity-map.md`](./entity-map.md)。\n- 如果你准备继续读 `s16-s18`,建议把 [`team-task-lane-model.md`](./team-task-lane-model.md) 放在手边,它会把 teammate、protocol request、task、runtime slot、worktree lane 这五层一起拆开。\n- 如果你开始怀疑“长期队友”和“活着的执行槽位”到底是什么关系,配合看 [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)。\n\n## 先把几个词讲明白\n\n### 什么是队友\n\n这里的 `teammate` 指的是:\n\n> 一个拥有名字、角色、消息入口和生命周期的持久 agent。\n\n### 什么是名册\n\n名册就是团队成员列表。\n\n它回答的是:\n\n- 现在队伍里有谁\n- 每个人是什么角色\n- 每个人现在是空闲、工作中还是已关闭\n\n### 什么是邮箱\n\n邮箱就是每个队友的收件箱。\n\n别人把消息发给它, \n它在自己的下一轮工作前先去收消息。\n\n## 最小心智模型\n\n这一章最简单的理解方式,是把每个队友都想成:\n\n> 一个有自己循环、自己收件箱、自己上下文的人。\n\n```text\nlead\n |\n +-- spawn alice (coder)\n +-- spawn bob (tester)\n |\n +-- send message --> alice inbox\n +-- send message --> bob inbox\n\nalice\n |\n +-- 自己的 messages\n +-- 自己的 inbox\n +-- 自己的 agent loop\n\nbob\n |\n +-- 自己的 messages\n +-- 自己的 inbox\n +-- 自己的 agent loop\n```\n\n和 `s04` 的最大区别是:\n\n**subagent 是一次性执行单元,teammate 是长期存在的协作成员。**\n\n## 关键数据结构\n\n### 1. TeamMember\n\n```python\nmember = {\n \"name\": \"alice\",\n \"role\": \"coder\",\n \"status\": \"working\",\n}\n```\n\n教学版先只保留这 3 个字段就够了:\n\n- `name`:名字\n- `role`:角色\n- `status`:状态\n\n### 2. TeamConfig\n\n```python\nconfig = {\n \"team_name\": \"default\",\n \"members\": [member1, member2],\n}\n```\n\n它通常可以放在:\n\n```text\n.team/config.json\n```\n\n这份名册让系统重启以后,仍然知道:\n\n- 团队里曾经有谁\n- 每个人当前是什么角色\n\n### 3. MessageEnvelope\n\n```python\nmessage = {\n \"type\": \"message\",\n \"from\": \"lead\",\n \"content\": \"Please review auth module.\",\n \"timestamp\": 1710000000.0,\n}\n```\n\n`envelope` 这个词本来是“信封”的意思。 \n程序里用它表示:\n\n> 把消息正文和元信息一起包起来的一条记录。\n\n## 最小实现\n\n### 第一步:先有一份队伍名册\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.team_dir = team_dir\n self.config_path = team_dir / \"config.json\"\n self.config = self._load_config()\n```\n\n名册是本章的起点。 \n没有名册,就没有真正的“团队实体”。\n\n### 第二步:spawn 一个持久队友\n\n```python\ndef spawn(self, name: str, role: str, prompt: str):\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n thread.start()\n```\n\n这里的关键不在于线程本身,而在于:\n\n**队友一旦被创建,就不只是一次性工具调用,而是一个有持续生命周期的成员。**\n\n### 第三步:给每个队友一个邮箱\n\n教学版最简单的做法可以直接用 JSONL 文件:\n\n```text\n.team/inbox/alice.jsonl\n.team/inbox/bob.jsonl\n```\n\n发消息时追加一行:\n\n```python\ndef send(self, sender: str, to: str, content: str):\n with open(f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps({\n \"type\": \"message\",\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }) + \"\\n\")\n```\n\n收消息时:\n\n1. 读出全部\n2. 解析为消息列表\n3. 清空收件箱\n\n### 第四步:队友每轮先看邮箱,再继续工作\n\n```python\ndef teammate_loop(name: str, role: str, prompt: str):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n\n while True:\n inbox = bus.read_inbox(name)\n for item in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(item)})\n\n response = client.messages.create(...)\n ...\n```\n\n这一步一定要讲透。\n\n因为它说明:\n\n**队友不是靠“被重新创建”来获得新任务,而是靠“下一轮先检查邮箱”来接收新工作。**\n\n## 如何接到前面章节的系统里\n\n这章最容易出现的误解是:\n\n> 好像系统突然“多了几个人”,但不知道这些人到底接在之前哪一层。\n\n更准确的接法应该是:\n\n```text\n用户目标 / lead 判断需要长期分工\n ->\nspawn teammate\n ->\n写入 .team/config.json\n ->\n通过 inbox 分派消息、摘要、任务线索\n ->\nteammate 先 drain inbox\n ->\n进入自己的 agent loop 和工具调用\n ->\n把结果回送给 lead,或继续等待下一轮工作\n```\n\n这里要特别看清三件事:\n\n1. `s12-s14` 已经给了你任务板、后台执行、时间触发这些“工作层”。\n2. `s15` 现在补的是“长期执行者”,也就是谁长期在线、谁能反复接活。\n3. 本章还没有进入“自己找活”或“自动认领”。\n\n也就是说,`s15` 的默认工作方式仍然是:\n\n- 由 lead 手动创建队友\n- 由 lead 通过邮箱分派事情\n- 队友在自己的循环里持续处理\n\n真正的自治认领,要到 `s17` 才展开。\n\n## Teammate、Subagent、Runtime Task 到底怎么区分\n\n这是这一组章节里最容易混的点。\n\n可以直接记这张表:\n\n| 机制 | 更像什么 | 生命周期 | 关键边界 |\n|---|---|---|\n| subagent | 一次性外包助手 | 干完就结束 | 重点是“隔离一小段探索性上下文” |\n| runtime task | 正在运行的后台执行槽位 | 任务跑完或取消就结束 | 重点是“慢任务稍后回来”,不是长期身份 |\n| teammate | 长期在线队友 | 可以反复接任务 | 重点是“有名字、有邮箱、有独立循环” |\n\n再换成更口语的话说:\n\n- subagent 适合“帮我查一下再回来汇报”\n- runtime task 适合“这件事你后台慢慢跑,结果稍后通知我”\n- teammate 适合“你以后长期负责测试方向”\n\n## 这一章的教学边界\n\n本章先只把 3 件事讲稳:\n\n- 名册\n- 邮箱\n- 独立循环\n\n这已经足够把“长期队友”这个实体立起来。\n\n但它还没有展开后面两层能力:\n\n### 第一层:结构化协议\n\n也就是:\n\n- 哪些消息只是普通交流\n- 哪些消息是带 `request_id` 的结构化请求\n\n这部分放到下一章 `s16`。\n\n### 第二层:自治认领\n\n也就是:\n\n- 队友空闲时能不能自己找活\n- 能不能自己恢复工作\n\n这部分放到 `s17`。\n\n## 初学者最容易犯的错\n\n### 1. 把队友当成“名字不同的 subagent”\n\n如果生命周期还是“执行完就销毁”,那本质上还不是 teammate。\n\n### 2. 队友之间共用同一份 messages\n\n这样上下文会互相污染。\n\n每个队友都应该有自己的对话状态。\n\n### 3. 没有持久名册\n\n如果系统关掉以后完全不知道“团队里曾经有谁”,那就很难继续做长期协作。\n\n### 4. 没有邮箱,靠共享变量直接喊话\n\n教学上不建议一开始就这么做。\n\n因为它会把“队友通信”和“进程内部细节”绑得太死。\n\n## 学完这一章,你应该真正掌握什么\n\n学完以后,你应该能独立说清下面几件事:\n\n1. teammate 的核心不是“多一个模型调用”,而是“多一个长期存在的执行者”。\n2. 团队系统至少需要“名册 + 邮箱 + 独立循环”。\n3. 每个队友都应该有自己的 `messages` 和自己的 inbox。\n4. subagent 和 teammate 的根本区别在生命周期,而不是名字。\n\n如果这 4 点已经稳了,说明你已经真正理解了“多 agent 团队”是怎么从单 agent 演化出来的。\n\n## 下一章学什么\n\n这一章解决的是:\n\n> 团队成员如何长期存在、互相发消息。\n\n下一章 `s16` 要解决的是:\n\n> 当消息不再只是自由聊天,而要变成可追踪、可批准、可拒绝的协作流程时,该怎么设计。\n\n也就是从“有团队”继续走向“团队协议”。\n"
+ },
+ {
+ "version": "s16",
+ "slug": "s16-team-protocols",
+ "locale": "zh",
+ "title": "s16: Team Protocols (团队协议)",
+ "kind": "chapter",
+ "filename": "s16-team-protocols.md",
+ "content": "# s16: Team Protocols (团队协议)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > [ s16 ] > s17 > s18 > s19`\n\n> *有了邮箱以后,团队已经能说话;有了协议以后,团队才开始会“按规矩协作”。*\n\n## 这一章要解决什么问题\n\n`s15` 已经让队友之间可以互相发消息。\n\n但如果所有事情都只靠自由文本,会有两个明显问题:\n\n- 某些动作必须明确批准或拒绝,不能只靠一句模糊回复\n- 一旦多个请求同时存在,系统很难知道“这条回复对应哪一件事”\n\n最典型的两个场景就是:\n\n1. 队友要不要优雅关机\n2. 某个高风险计划要不要先审批\n\n这两件事看起来不同,但结构其实一样:\n\n```text\n一方发请求\n另一方明确回复\n双方都能用同一个 request_id 对上号\n```\n\n所以这一章要加的,不是更多自由聊天,而是:\n\n**一层结构化协议。**\n\n## 建议联读\n\n- 如果你开始把普通消息和协议请求混掉,先回 [`glossary.md`](./glossary.md) 和 [`entity-map.md`](./entity-map.md)。\n- 如果你准备继续读 `s17` 和 `s18`,建议先看 [`team-task-lane-model.md`](./team-task-lane-model.md),这样后面自治认领和 worktree 车道不会一下子缠在一起。\n- 如果你想重新确认协议请求最终怎样回流到主系统,可以配合看 [`s00b-one-request-lifecycle.md`](./s00b-one-request-lifecycle.md)。\n\n## 先把几个词讲明白\n\n### 什么是协议\n\n协议可以简单理解成:\n\n> 双方提前约定好“消息长什么样、收到以后怎么处理”。\n\n### 什么是 request_id\n\n`request_id` 就是请求编号。\n\n它的作用是:\n\n- 某个请求发出去以后有一个唯一身份\n- 之后的批准、拒绝、超时都能准确指向这一个请求\n\n### 什么是请求-响应模式\n\n这个词听起来像高级概念,其实很简单:\n\n```text\n请求方:我发起一件事\n响应方:我明确回答同意还是不同意\n```\n\n本章做的,就是把这种模式从“口头表达”升级成“结构化数据”。\n\n## 最小心智模型\n\n从教学角度,你可以先把协议看成两层:\n\n```text\n1. 协议消息\n2. 请求追踪表\n```\n\n### 协议消息\n\n```python\n{\n \"type\": \"shutdown_request\",\n \"from\": \"lead\",\n \"to\": \"alice\",\n \"request_id\": \"req_001\",\n \"payload\": {},\n}\n```\n\n### 请求追踪表\n\n```python\nrequests = {\n \"req_001\": {\n \"kind\": \"shutdown\",\n \"status\": \"pending\",\n }\n}\n```\n\n只要这两层都存在,系统就能同时回答:\n\n- 现在发生了什么\n- 这件事目前走到哪一步\n\n## 关键数据结构\n\n### 1. ProtocolEnvelope\n\n```python\nmessage = {\n \"type\": \"shutdown_request\",\n \"from\": \"lead\",\n \"to\": \"alice\",\n \"request_id\": \"req_001\",\n \"payload\": {},\n \"timestamp\": 1710000000.0,\n}\n```\n\n它比普通消息多出来的关键字段就是:\n\n- `type`\n- `request_id`\n- `payload`\n\n### 2. RequestRecord\n\n```python\nrequest = {\n \"request_id\": \"req_001\",\n \"kind\": \"shutdown\",\n \"from\": \"lead\",\n \"to\": \"alice\",\n \"status\": \"pending\",\n}\n```\n\n它负责记录:\n\n- 这是哪种请求\n- 谁发给谁\n- 当前状态是什么\n\n如果你想把教学版再往真实系统推进一步,建议不要只放在内存字典里,而是直接落盘:\n\n```text\n.team/requests/\n req_001.json\n req_002.json\n```\n\n这样系统就能做到:\n\n- 请求状态可恢复\n- 协议过程可检查\n- 即使主循环继续往前,请求记录也不会丢\n\n### 3. 状态机\n\n本章里的状态机非常简单:\n\n```text\npending -> approved\npending -> rejected\npending -> expired\n```\n\n这里再次提醒读者:\n\n`状态机` 的意思不是复杂理论, \n只是“状态之间如何变化的一张规则表”。\n\n## 最小实现\n\n### 协议 1:优雅关机\n\n“优雅关机”的意思不是直接把线程硬砍掉。 \n而是:\n\n1. 先发关机请求\n2. 队友明确回复同意或拒绝\n3. 如果同意,先收尾,再退出\n\n发请求:\n\n```python\ndef request_shutdown(target: str):\n request_id = new_id()\n requests[request_id] = {\n \"kind\": \"shutdown\",\n \"target\": target,\n \"status\": \"pending\",\n }\n bus.send(\n \"lead\",\n target,\n msg_type=\"shutdown_request\",\n extra={\"request_id\": request_id},\n content=\"Please shut down gracefully.\",\n )\n```\n\n收响应:\n\n```python\ndef handle_shutdown_response(request_id: str, approve: bool):\n record = requests[request_id]\n record[\"status\"] = \"approved\" if approve else \"rejected\"\n```\n\n### 协议 2:计划审批\n\n这其实还是同一个请求-响应模板。\n\n比如某个队友想做高风险改动,可以先提计划:\n\n```python\ndef submit_plan(name: str, plan_text: str):\n request_id = new_id()\n requests[request_id] = {\n \"kind\": \"plan_approval\",\n \"from\": name,\n \"status\": \"pending\",\n \"plan\": plan_text,\n }\n bus.send(\n name,\n \"lead\",\n msg_type=\"plan_approval\",\n extra={\"request_id\": request_id, \"plan\": plan_text},\n content=\"Requesting review.\",\n )\n```\n\n领导审批:\n\n```python\ndef review_plan(request_id: str, approve: bool, feedback: str = \"\"):\n record = requests[request_id]\n record[\"status\"] = \"approved\" if approve else \"rejected\"\n bus.send(\n \"lead\",\n record[\"from\"],\n msg_type=\"plan_approval_response\",\n extra={\"request_id\": request_id, \"approve\": approve},\n content=feedback,\n )\n```\n\n看到这里,读者应该开始意识到:\n\n**本章最重要的不是“关机”或“计划”本身,而是同一个协议模板可以反复复用。**\n\n## 协议请求不是普通消息\n\n这一点一定要讲透。\n\n邮箱里虽然都叫“消息”,但 `s16` 以后其实已经分成两类:\n\n### 1. 普通消息\n\n适合:\n\n- 讨论\n- 提醒\n- 补充说明\n\n### 2. 协议消息\n\n适合:\n\n- 审批\n- 关机\n- 交接\n- 签收\n\n它至少要带:\n\n- `type`\n- `request_id`\n- `from`\n- `to`\n- `payload`\n\n最简单的记法是:\n\n- 普通消息解决“说了什么”\n- 协议消息解决“这件事走到哪一步了”\n\n## 如何接到团队系统里\n\n这章真正补上的,不只是两个新工具名,而是一条新的协作回路:\n\n```text\n某个队友 / lead 发起请求\n ->\n写入 RequestRecord\n ->\n把 ProtocolEnvelope 投递进对方 inbox\n ->\n对方下一轮 drain inbox\n ->\n按 request_id 更新请求状态\n ->\n必要时再回一条 response\n ->\n请求方根据 approved / rejected 继续后续动作\n```\n\n你可以把它理解成:\n\n- `s15` 给了团队“邮箱”\n- `s16` 现在给邮箱里的某些消息加上“编号 + 状态机 + 回执”\n\n如果少了这条结构化回路,团队虽然能沟通,但无法稳定协作。\n\n## MessageEnvelope、ProtocolEnvelope、RequestRecord、TaskRecord 的边界\n\n这 4 个对象很容易一起打结。最稳的记法是:\n\n| 对象 | 它回答什么问题 | 典型字段 |\n|---|---|---|\n| `MessageEnvelope` | 谁跟谁说了什么 | `from` / `to` / `content` |\n| `ProtocolEnvelope` | 这是不是一条结构化请求或响应 | `type` / `request_id` / `payload` |\n| `RequestRecord` | 这件协作流程现在走到哪一步 | `kind` / `status` / `from` / `to` |\n| `TaskRecord` | 真正的工作项是什么、谁在做、还卡着谁 | `subject` / `status` / `blockedBy` / `owner` |\n\n一定要牢牢记住:\n\n- 协议请求不是任务本身\n- 请求状态表也不是任务板\n- 协议只负责“协作流程”\n- 任务系统才负责“真正的工作推进”\n\n## 这一章的教学边界\n\n教学版先只讲 2 类协议就够了:\n\n- `shutdown`\n- `plan_approval`\n\n因为这两类已经足够把下面几件事讲清楚:\n\n- 什么是结构化消息\n- 什么是 request_id\n- 为什么要有请求状态表\n- 为什么协议不是自由文本\n\n等这套模板学稳以后,你完全可以再扩展:\n\n- 任务认领协议\n- 交接协议\n- 结果签收协议\n\n但这些都应该建立在本章的统一模板之上。\n\n## 初学者最容易犯的错\n\n### 1. 没有 `request_id`\n\n没有编号,多个请求同时存在时很快就会乱。\n\n### 2. 收到请求以后只回一句自然语言\n\n例如:\n\n```text\n好的,我知道了\n```\n\n人类可能看得懂,但系统很难稳定处理。\n\n### 3. 没有请求状态表\n\n如果系统不记录 `pending` / `approved` / `rejected`,协议其实就没有真正落地。\n\n### 4. 把协议消息和普通消息混成一种结构\n\n这样后面一多,处理逻辑会越来越混。\n\n## 学完这一章,你应该真正掌握什么\n\n学完以后,你应该能独立复述下面几件事:\n\n1. 团队协议的核心,是“请求-响应 + request_id + 状态表”。\n2. 协议消息和普通聊天消息不是一回事。\n3. 关机协议和计划审批虽然业务不同,但底层模板可以复用。\n4. 团队一旦进入结构化协作,就要靠协议,而不是只靠自然语言。\n\n如果这 4 点已经非常稳定,说明这一章真正学到了。\n\n## 下一章学什么\n\n这一章解决的是:\n\n> 团队如何按规则协作。\n\n下一章 `s17` 要解决的是:\n\n> 如果没有人每次都手动派活,队友能不能在空闲时自己找任务、自己恢复工作。\n\n也就是从“协议化协作”继续走向“自治行为”。\n"
+ },
+ {
+ "version": "s17",
+ "slug": "s17-autonomous-agents",
+ "locale": "zh",
+ "title": "s17: Autonomous Agents (自治智能体)",
+ "kind": "chapter",
+ "filename": "s17-autonomous-agents.md",
+ "content": "# s17: Autonomous Agents (自治智能体)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > [ s17 ] > s18 > s19`\n\n> *一个团队真正开始“自己运转”,不是因为 agent 数量变多,而是因为空闲的队友会自己去找下一份工作。*\n\n## 这一章要解决什么问题\n\n到了 `s16`,团队已经有:\n\n- 持久队友\n- 邮箱\n- 协议\n- 任务板\n\n但还有一个明显瓶颈:\n\n**很多事情仍然要靠 lead 手动分配。**\n\n例如任务板上已经有 10 条可做任务,如果还要 lead 一个个点名:\n\n- Alice 做 1\n- Bob 做 2\n- Charlie 做 3\n\n那团队规模一大,lead 就会变成瓶颈。\n\n所以这一章要解决的核心问题是:\n\n**让空闲队友自己扫描任务板,找到可做的任务并认领。**\n\n## 建议联读\n\n- 如果你开始把 teammate、task、runtime slot 三层一起讲糊,先回 [`team-task-lane-model.md`](./team-task-lane-model.md)。\n- 如果你读到“auto-claim”时开始疑惑“活着的执行槽位”到底放在哪,继续看 [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)。\n- 如果你开始忘记“长期队友”和“一次性 subagent”最根本的区别,回看 [`entity-map.md`](./entity-map.md)。\n\n## 先解释几个名词\n\n### 什么叫自治\n\n这里的自治,不是完全没人管。\n\n这里说的自治是:\n\n> 在提前给定规则的前提下,队友可以自己决定下一步接哪份工作。\n\n### 什么叫认领\n\n认领,就是把一条原本没人负责的任务,标记成“现在由我负责”。\n\n### 什么叫空闲阶段\n\n空闲阶段不是关机,也不是消失。\n\n它表示:\n\n> 这个队友当前手头没有活,但仍然活着,随时准备接新活。\n\n## 最小心智模型\n\n最清楚的理解方式,是把每个队友想成在两个阶段之间切换:\n\n```text\nWORK\n |\n | 当前轮工作做完,或者主动进入 idle\n v\nIDLE\n |\n +-- 看邮箱,有新消息 -> 回到 WORK\n |\n +-- 看任务板,有 ready task -> 认领 -> 回到 WORK\n |\n +-- 长时间什么都没有 -> shutdown\n```\n\n这里的关键不是“让它永远不停想”,而是:\n\n**空闲时,按规则检查两类新输入:邮箱和任务板。**\n\n## 关键数据结构\n\n### 1. Claimable Predicate\n\n和 `s12` 一样,这里最重要的是:\n\n**什么任务算“当前这个队友可以安全认领”的任务。**\n\n在当前教学代码里,判定已经不是单纯看 `pending`,而是:\n\n```python\ndef is_claimable_task(task: dict, role: str | None = None) -> bool:\n return (\n task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")\n and _task_allows_role(task, role)\n )\n```\n\n这 4 个条件缺一不可:\n\n- 任务还没开始\n- 还没人认领\n- 没有前置阻塞\n- 当前队友角色满足认领策略\n\n最后一条很关键。\n\n因为现在任务可以带:\n\n- `claim_role`\n- `required_role`\n\n例如:\n\n```python\ntask = {\n \"id\": 7,\n \"subject\": \"Implement login page\",\n \"status\": \"pending\",\n \"owner\": \"\",\n \"blockedBy\": [],\n \"claim_role\": \"frontend\",\n}\n```\n\n这表示:\n\n> 这条任务不是“谁空着谁就拿”,而是要先过角色条件。\n\n### 2. 认领后的任务记录\n\n一旦认领成功,任务记录至少会发生这些变化:\n\n```python\n{\n \"id\": 7,\n \"owner\": \"alice\",\n \"status\": \"in_progress\",\n \"claimed_at\": 1710000000.0,\n \"claim_source\": \"auto\",\n}\n```\n\n这里新增的两个字段很值得单独记住:\n\n- `claimed_at`:什么时候被认领\n- `claim_source`:这次认领是 `auto` 还是 `manual`\n\n因为到这一步,系统开始不只是知道“任务现在有人做了”,还开始知道:\n\n- 这是谁拿走的\n- 是主动扫描拿走,还是手动点名拿走\n\n### 3. Claim Event Log\n\n除了回写任务文件,这章还会把认领动作追加到:\n\n```text\n.tasks/claim_events.jsonl\n```\n\n每条事件大致长这样:\n\n```python\n{\n \"event\": \"task.claimed\",\n \"task_id\": 7,\n \"owner\": \"alice\",\n \"role\": \"frontend\",\n \"source\": \"auto\",\n \"ts\": 1710000000.0,\n}\n```\n\n为什么这层日志重要?\n\n因为它回答的是“自治系统刚刚做了什么”。\n\n只看最终任务文件,你知道的是:\n\n- 现在是谁 owner\n\n而看事件日志,你才能知道:\n\n- 它是什么时候被拿走的\n- 是谁拿走的\n- 是空闲时自动拿走,还是人工调用 `claim_task`\n\n### 4. Durable Request Record\n\n这章虽然重点是自治,但它**不能从 `s16` 退回到“协议请求只放内存里”**。\n\n所以当前代码里仍然保留了持久化请求记录:\n\n```text\n.team/requests/{request_id}.json\n```\n\n它保存的是:\n\n- shutdown request\n- plan approval request\n- 对应的状态更新\n\n这层边界很重要,因为自治队友并不是在“脱离协议系统另起炉灶”,而是:\n\n> 在已有团队协议之上,额外获得“空闲时自己找活”的能力。\n\n### 5. 身份块\n\n当上下文被压缩后,队友有时会“忘记自己是谁”。\n\n最小补法是重新注入一段身份提示:\n\n```python\nidentity = {\n \"role\": \"user\",\n \"content\": \"You are 'alice', role: frontend, team: default. Continue your work. \",\n}\n```\n\n当前实现里还会同时补一条很短的确认语:\n\n```python\n{\"role\": \"assistant\", \"content\": \"I am alice. Continuing.\"}\n```\n\n这样做的目的不是好看,而是为了让恢复后的下一轮继续知道:\n\n- 我是谁\n- 我的角色是什么\n- 我属于哪个团队\n\n## 最小实现\n\n### 第一步:让队友拥有 `WORK -> IDLE` 的循环\n\n```python\nwhile True:\n run_work_phase(...)\n should_resume = run_idle_phase(...)\n if not should_resume:\n break\n```\n\n### 第二步:在 IDLE 里先看邮箱\n\n```python\ndef idle_phase(name: str, messages: list) -> bool:\n inbox = bus.read_inbox(name)\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": json.dumps(inbox),\n })\n return True\n```\n\n这一步的意思是:\n\n如果有人明确找我,那我优先处理“明确发给我的工作”。\n\n### 第三步:如果邮箱没消息,再按“当前角色”扫描可认领任务\n\n```python\n unclaimed = scan_unclaimed_tasks(role)\n if unclaimed:\n task = unclaimed[0]\n claim_result = claim_task(\n task[\"id\"],\n name,\n role=role,\n source=\"auto\",\n )\n```\n\n这里当前代码有两个很关键的升级:\n\n- `scan_unclaimed_tasks(role)` 不是无差别扫任务,而是带着角色过滤\n- `claim_task(..., source=\"auto\")` 会把“这次是自治认领”显式写进任务与事件日志\n\n也就是说,自治不是“空闲了就乱抢一条”,而是:\n\n> 按当前队友的角色、任务状态和阻塞关系,挑出一条真正允许它接手的工作。\n\n### 第四步:认领后先补身份,再把任务提示塞回主循环\n\n```python\n ensure_identity_context(messages, name, role, team_name)\n messages.append({\n \"role\": \"user\",\n \"content\": f\"Task #{task['id']}: {task['subject']} \",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": f\"{claim_result}. Working on it.\",\n })\n return True\n```\n\n这一步非常关键。\n\n因为“认领成功”本身还不等于“队友真的能顺利继续”。\n\n还必须把两件事接回上下文里:\n\n- 身份上下文\n- 新任务提示\n\n只有这样,下一轮 `WORK` 才不是无头苍蝇,而是:\n\n> 带着明确身份和明确任务恢复工作。\n\n### 第五步:长时间没事就退出\n\n```python\n time.sleep(POLL_INTERVAL)\n ...\n return False\n```\n\n为什么需要这个退出路径?\n\n因为空闲队友不一定要永远占着资源。 \n教学版先做“空闲一段时间后关闭”就够了。\n\n## 为什么认领必须是原子动作\n\n“原子”这个词第一次看到可能不熟。\n\n这里它的意思是:\n\n> 认领这一步要么完整成功,要么不发生,不能一半成功一半失败。\n\n为什么?\n\n因为两个队友可能同时扫描到同一个可做任务。\n\n如果没有锁,就可能发生:\n\n- Alice 看见任务 3 没主人\n- Bob 也看见任务 3 没主人\n- 两人都把自己写成 owner\n\n所以最小教学版也应该加一个认领锁:\n\n```python\nwith claim_lock:\n task = load(task_id)\n if task[\"owner\"]:\n return \"already claimed\"\n task[\"owner\"] = name\n task[\"status\"] = \"in_progress\"\n save(task)\n```\n\n## 身份重注入为什么重要\n\n这是这章里一个很容易被忽视,但很关键的点。\n\n当上下文压缩发生以后,队友可能丢掉这些关键信息:\n\n- 我是谁\n- 我的角色是什么\n- 我属于哪个团队\n\n如果没有这些信息,队友后续行为很容易漂。\n\n所以一个很实用的做法是:\n\n如果发现 messages 的开头已经没有身份块,就把身份块重新插回去。\n\n这里你可以把它理解成一条恢复规则:\n\n> 任何一次从 idle 恢复、或任何一次压缩后恢复,只要身份上下文可能变薄,就先补身份,再继续工作。\n\n## 为什么 s17 不能从 s16 退回“内存协议”\n\n这是一个很容易被漏讲,但其实非常重要的点。\n\n很多人一看到“自治”,就容易只盯:\n\n- idle\n- auto-claim\n- 轮询\n\n然后忘了 `s16` 已经建立过的另一条主线:\n\n- 请求必须可追踪\n- 协议状态必须可恢复\n\n所以现在教学代码里,像:\n\n- shutdown request\n- plan approval\n\n仍然会写进:\n\n```text\n.team/requests/{request_id}.json\n```\n\n也就是说,`s17` 不是推翻 `s16`,而是在 `s16` 上继续加一条新能力:\n\n```text\n协议系统继续存在\n +\n自治扫描与认领开始存在\n```\n\n这两条线一起存在,团队才会像一个真正的平台,而不是一堆各自乱跑的 worker。\n\n## 如何接到前面几章里\n\n这一章其实是前面几章第一次真正“串起来”的地方:\n\n- `s12` 提供任务板\n- `s15` 提供持久队友\n- `s16` 提供结构化协议\n- `s17` 则让队友在没有明确点名时,也能自己找活\n\n所以你可以把 `s17` 理解成:\n\n**从“被动协作”升级到“主动协作”。**\n\n## 自治的是“长期队友”,不是“一次性 subagent”\n\n这层边界如果不讲清,读者很容易把 `s04` 和 `s17` 混掉。\n\n`s17` 里的自治执行者,仍然是 `s15` 那种长期队友:\n\n- 有名字\n- 有角色\n- 有邮箱\n- 有 idle 阶段\n- 可以反复接活\n\n它不是那种:\n\n- 接一条子任务\n- 做完返回摘要\n- 然后立刻消失\n\n的一次性 subagent。\n\n同样地,这里认领的也是:\n\n- `s12` 里的工作图任务\n\n而不是:\n\n- `s13` 里的后台执行槽位\n\n所以这章其实是在两条已存在的主线上再往前推一步:\n\n- 长期队友\n- 工作图任务\n\n再把它们用“自治认领”连接起来。\n\n如果你开始把下面这些词混在一起:\n\n- teammate\n- protocol request\n- task\n- runtime task\n\n建议回看:\n\n- [`team-task-lane-model.md`](./team-task-lane-model.md)\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n\n## 初学者最容易犯的错\n\n### 1. 只看 `pending`,不看 `blockedBy`\n\n如果一个任务虽然是 `pending`,但前置任务还没完成,它就不应该被认领。\n\n### 2. 只看状态,不看 `claim_role` / `required_role`\n\n这会让错误的队友接走错误的任务。\n\n教学版虽然简单,但从这一章开始,已经应该明确告诉读者:\n\n- 并不是所有 ready task 都适合所有队友\n- 角色条件本身也是 claim policy 的一部分\n\n### 3. 没有认领锁\n\n这会直接导致重复抢同一条任务。\n\n### 4. 空闲阶段只轮询任务板,不看邮箱\n\n这样队友会错过别人明确发给它的消息。\n\n### 5. 认领了任务,但没有写 claim event\n\n这样最后你只能看到“任务现在被谁做”,却看不到:\n\n- 它是什么时候被拿走的\n- 是自动认领还是手动认领\n\n### 6. 队友永远不退出\n\n教学版里,长时间无事可做时退出是合理的。 \n否则读者会更难理解资源何时释放。\n\n### 7. 上下文压缩后不重注入身份\n\n这很容易让队友后面的行为越来越不像“它本来的角色”。\n\n## 教学边界\n\n这一章先只把自治主线讲清楚:\n\n**空闲检查 -> 安全认领 -> 恢复工作。**\n\n只要这条链路稳了,读者就已经真正理解了“自治”是什么。\n\n更细的 claim policy、公平调度、事件驱动唤醒、长期保活,都应该建立在这条最小自治链之后,而不是抢在前面。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s17_autonomous_agents.py\n```\n\n可以试试这些任务:\n\n1. 先建几条 ready task,再生成两个队友,观察它们是否会自动分工。\n2. 建几条被阻塞的任务,确认队友不会错误认领。\n3. 让某个队友进入 idle,再发一条消息给它,观察它是否会重新被唤醒。\n\n这一章要建立的核心心智是:\n\n**自治不是让 agent 乱跑,而是让它在清晰规则下自己接住下一份工作。**\n"
+ },
+ {
+ "version": "s18",
+ "slug": "s18-worktree-task-isolation",
+ "locale": "zh",
+ "title": "s18: Worktree + Task Isolation (Worktree 任务隔离)",
+ "kind": "chapter",
+ "filename": "s18-worktree-task-isolation.md",
+ "content": "# s18: Worktree + Task Isolation (Worktree 任务隔离)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > [ s18 ] > s19`\n\n> *任务板解决“做什么”,worktree 解决“在哪做而不互相踩到”。*\n\n## 这一章要解决什么问题\n\n到 `s17` 为止,系统已经可以:\n\n- 拆任务\n- 认领任务\n- 让多个 agent 并行推进不同工作\n\n但如果所有人都在同一个工作目录里改文件,很快就会出现这些问题:\n\n- 两个任务同时改同一个文件\n- 一个任务还没做完,另一个任务的修改已经把目录污染了\n- 想单独回看某个任务的改动范围时,很难分清\n\n也就是说,任务系统已经回答了“谁做什么”,却还没有回答:\n\n**每个任务应该在哪个独立工作空间里执行。**\n\n这就是 worktree 要解决的问题。\n\n## 建议联读\n\n- 如果你开始把 task、runtime slot、worktree lane 三层混成一个词,先看 [`team-task-lane-model.md`](./team-task-lane-model.md)。\n- 如果你想确认 worktree 记录和任务记录分别该保存哪些字段,回看 [`data-structures.md`](./data-structures.md)。\n- 如果你想从“参考仓库主干”角度确认这一章为什么必须晚于 tasks / teams,再看 [`s00e-reference-module-map.md`](./s00e-reference-module-map.md)。\n\n## 先解释几个名词\n\n### 什么是 worktree\n\n如果你熟悉 git,可以把 worktree 理解成:\n\n> 同一个仓库的另一个独立检出目录。\n\n如果你还不熟悉 git,也可以先把它理解成:\n\n> 一条属于某个任务的独立工作车道。\n\n### 什么叫隔离执行\n\n隔离执行就是:\n\n> 任务 A 在自己的目录里跑,任务 B 在自己的目录里跑,彼此默认不共享未提交改动。\n\n### 什么叫绑定\n\n绑定的意思是:\n\n> 把某个任务 ID 和某个 worktree 记录明确关联起来。\n\n## 最小心智模型\n\n最容易理解的方式,是把这一章拆成两张表:\n\n```text\n任务板\n 负责回答:做什么、谁在做、状态如何\n\nworktree 注册表\n 负责回答:在哪做、目录在哪、对应哪个任务\n```\n\n两者通过 `task_id` 连起来:\n\n```text\n.tasks/task_12.json\n {\n \"id\": 12,\n \"subject\": \"Refactor auth flow\",\n \"status\": \"in_progress\",\n \"worktree\": \"auth-refactor\"\n }\n\n.worktrees/index.json\n {\n \"worktrees\": [\n {\n \"name\": \"auth-refactor\",\n \"path\": \".worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\"\n }\n ]\n }\n```\n\n看懂这两条记录,这一章的主线就已经抓住了:\n\n**任务记录工作目标,worktree 记录执行车道。**\n\n## 关键数据结构\n\n### 1. TaskRecord 不再只记录 `worktree`\n\n到当前教学代码这一步,任务记录里和车道相关的字段已经不只一个:\n\n```python\ntask = {\n \"id\": 12,\n \"subject\": \"Refactor auth flow\",\n \"status\": \"in_progress\",\n \"owner\": \"alice\",\n \"worktree\": \"auth-refactor\",\n \"worktree_state\": \"active\",\n \"last_worktree\": \"auth-refactor\",\n \"closeout\": None,\n}\n```\n\n这 4 个字段分别回答不同问题:\n\n- `worktree`:当前还绑定着哪条车道\n- `worktree_state`:这条绑定现在是 `active`、`kept`、`removed` 还是 `unbound`\n- `last_worktree`:最近一次用过哪条车道\n- `closeout`:最后一次收尾动作是什么\n\n为什么要拆这么细?\n\n因为到多 agent 并行阶段,系统已经不只需要知道“现在在哪做”,还需要知道:\n\n- 这条车道现在是不是还活着\n- 它最后是保留还是回收\n- 之后如果恢复或排查,应该看哪条历史车道\n\n### 2. WorktreeRecord 不只是路径映射\n\n```python\nworktree = {\n \"name\": \"auth-refactor\",\n \"path\": \".worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\",\n \"last_entered_at\": 1710000000.0,\n \"last_command_at\": 1710000012.0,\n \"last_command_preview\": \"pytest tests/auth -q\",\n \"closeout\": None,\n}\n```\n\n这里也要特别注意:\n\nworktree 记录回答的不只是“目录在哪”,还开始回答:\n\n- 最近什么时候进入过\n- 最近跑过什么命令\n- 最后是怎么收尾的\n\n这就是为什么这章讲的是:\n\n**可观察的执行车道**\n\n而不只是“多开一个目录”。\n\n### 3. CloseoutRecord\n\n这一章在当前代码里,一个完整的收尾记录大致是:\n\n```python\ncloseout = {\n \"action\": \"keep\",\n \"reason\": \"Need follow-up review\",\n \"at\": 1710000100.0,\n}\n```\n\n这层记录很重要,因为它把“结尾到底发生了什么”显式写出来,而不是靠人猜:\n\n- 是保留目录,方便继续追看\n- 还是回收目录,表示这条执行车道已经结束\n\n### 4. EventRecord\n\n```python\nevent = {\n \"event\": \"worktree.closeout.keep\",\n \"task_id\": 12,\n \"worktree\": \"auth-refactor\",\n \"reason\": \"Need follow-up review\",\n \"ts\": 1710000100.0,\n}\n```\n\n为什么还要事件记录?\n\n因为 worktree 的生命周期经常跨很多步:\n\n- 创建\n- 进入\n- 运行命令\n- 保留\n- 删除\n- 删除失败\n\n有显式事件日志,会比只看当前状态更容易排查问题。\n\n## 最小实现\n\n### 第一步:先有任务,再有 worktree\n\n不要先开目录再回头补任务。\n\n更清楚的顺序是:\n\n1. 先创建任务\n2. 再为这个任务分配 worktree\n\n```python\ntask = tasks.create(\"Refactor auth flow\")\nworktrees.create(\"auth-refactor\", task_id=task[\"id\"])\n```\n\n### 第二步:创建 worktree 并写入注册表\n\n```python\ndef create(self, name: str, task_id: int):\n path = self.root / \".worktrees\" / name\n branch = f\"wt/{name}\"\n\n run_git([\"worktree\", \"add\", \"-b\", branch, str(path), \"HEAD\"])\n\n record = {\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"task_id\": task_id,\n \"status\": \"active\",\n }\n self.index[\"worktrees\"].append(record)\n self._save_index()\n```\n\n### 第三步:同时更新任务记录,不只是写一个 `worktree`\n\n```python\ndef bind_worktree(task_id: int, name: str):\n task = tasks.load(task_id)\n task[\"worktree\"] = name\n task[\"last_worktree\"] = name\n task[\"worktree_state\"] = \"active\"\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n tasks.save(task)\n```\n\n为什么这一步很关键?\n\n因为如果只更新 worktree 注册表,不更新任务记录,系统就无法从任务板一眼看出“这个任务在哪个隔离目录里做”。\n\n### 第四步:显式进入车道,再在对应目录里执行命令\n\n当前代码里,进入和运行已经拆成两步:\n\n```python\nworktree_enter(\"auth-refactor\")\nworktree_run(\"auth-refactor\", \"pytest tests/auth -q\")\n```\n\n对应到底层,大致就是:\n\n```python\ndef enter(self, name: str):\n self._update_entry(name, last_entered_at=time.time())\n self.events.emit(\"worktree.enter\", ...)\n\ndef run(self, name: str, command: str):\n subprocess.run(command, cwd=worktree_path, ...)\n```\n\n```python\nsubprocess.run(command, cwd=worktree_path, ...)\n```\n\n这一行看起来普通,但它正是隔离的核心:\n\n**同一个命令,在不同 `cwd` 里执行,影响范围就不一样。**\n\n为什么还要单独补一个 `worktree_enter`?\n\n因为教学上你要让读者看见:\n\n- “分配车道”是一回事\n- “真正进入并开始在这条车道里工作”是另一回事\n\n这层边界一清楚,后面的观察字段才有意义:\n\n- `last_entered_at`\n- `last_command_at`\n- `last_command_preview`\n\n### 第五步:收尾时显式走 `worktree_closeout`\n\n不要让收尾是隐式的。\n\n当前更清楚的教学接口不是“分散记两个命令”,而是统一成一个 closeout 动作:\n\n```python\nworktree_closeout(\n name=\"auth-refactor\",\n action=\"keep\", # or \"remove\"\n reason=\"Need follow-up review\",\n complete_task=False,\n)\n```\n\n这样读者会更容易理解:\n\n- 收尾一定要选动作\n- 收尾可以带原因\n- 收尾会同时回写任务记录、车道记录和事件日志\n\n当然,底层仍然保留:\n\n- `worktree_keep(name)`\n- `worktree_remove(name, reason=..., complete_task=True)`\n\n但教学主线最好先把:\n\n> `keep` 和 `remove` 看成同一个 closeout 决策的两个分支\n\n这样读者心智会更顺。\n\n## 为什么 `worktree_state` 和 `status` 要分开\n\n这也是一个很容易被忽略的细点。\n\n很多初学者会想:\n\n> “任务有 `status` 了,为什么还要 `worktree_state`?”\n\n因为这两个状态根本不是一层东西:\n\n- 任务 `status` 回答:这件工作现在是 `pending`、`in_progress` 还是 `completed`\n- `worktree_state` 回答:这条执行车道现在是 `active`、`kept`、`removed` 还是 `unbound`\n\n举个最典型的例子:\n\n```text\n任务已经 completed\n 但 worktree 仍然 kept\n```\n\n这完全可能,而且很常见。 \n比如你已经做完了,但还想保留目录给 reviewer 看。\n\n所以:\n\n**任务状态和车道状态不能混成一个字段。**\n\n## 为什么 worktree 不是“只是一个 git 小技巧”\n\n很多初学者第一次看到这一章,会觉得:\n\n> “这不就是多开几个目录吗?”\n\n这句话只说对了一半。\n\n真正关键的不只是“多开目录”,而是:\n\n**把任务和执行目录做显式绑定,让并行工作有清楚的边界。**\n\n如果没有这层绑定,系统仍然不知道:\n\n- 哪个目录属于哪个任务\n- 收尾时该完成哪条任务\n- 崩溃后该恢复哪条关系\n\n## 如何接到前面章节里\n\n这章和前面几章是强耦合的:\n\n- `s12` 提供任务 ID\n- `s15-s17` 提供队友和认领机制\n- `s18` 则给这些任务提供独立执行车道\n\n把三者连起来看,会变成:\n\n```text\n任务被创建\n ->\n队友认领任务\n ->\n系统为任务分配 worktree\n ->\n命令在对应目录里执行\n ->\n任务完成时决定保留还是删除 worktree\n```\n\n这条链一旦建立,多 agent 并行工作就会清楚很多。\n\n## worktree 不是任务本身,而是任务的执行车道\n\n这句话值得单独再说一次。\n\n很多读者第一次学到这里时,会把这两个词混着用:\n\n- task\n- worktree\n\n但它们回答的其实不是同一个问题:\n\n- task:做什么\n- worktree:在哪做\n\n所以更完整、也更不容易混的表达方式是:\n\n- 工作图任务\n- worktree 执行车道\n\n如果你开始分不清:\n\n- 任务\n- 运行时任务\n- worktree\n\n建议回看:\n\n- [`team-task-lane-model.md`](./team-task-lane-model.md)\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n- [`entity-map.md`](./entity-map.md)\n\n## 初学者最容易犯的错\n\n### 1. 有 worktree 注册表,但任务记录里没有 `worktree`\n\n这样任务板就丢掉了最重要的一条执行信息。\n\n### 2. 有任务 ID,但命令仍然在主目录执行\n\n如果 `cwd` 没切过去,worktree 形同虚设。\n\n### 3. 只会 `worktree_remove`,不会解释 closeout 的含义\n\n这样读者最后只记住“删目录”这个动作,却不知道系统真正想表达的是:\n\n- 保留\n- 回收\n- 为什么这么做\n- 是否同时完结对应任务\n\n### 4. 删除 worktree 前不看未提交改动\n\n这是最危险的一类错误。\n\n教学版也应该至少先建立一个原则:\n\n**删除前先检查是否有脏改动。**\n\n### 5. 没有 `worktree_state` / `closeout` 这类显式收尾状态\n\n这样系统就会只剩下“现在目录还在不在”,而没有:\n\n- 这条车道最后怎么收尾\n- 是主动保留还是主动删除\n\n### 6. 把 worktree 当成长期垃圾堆\n\n如果从不清理,目录会越来越多,状态越来越乱。\n\n### 7. 没有事件日志\n\n一旦创建失败、删除失败或任务关系错乱,没有事件日志会很难排查。\n\n## 教学边界\n\n这章先要讲透的不是所有 worktree 运维细节,而是主干分工:\n\n- task 记录“做什么”\n- worktree 记录“在哪做”\n- enter / execute / closeout 串起这条隔离执行车道\n\n只要这条主干清楚,教学目标就已经达成。\n\n崩溃恢复、删除安全检查、全局缓存区、非 git 回退这些,都应该放在这条主干之后。\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s18_worktree_task_isolation.py\n```\n\n可以试试这些任务:\n\n1. 为两个不同任务各建一个 worktree,观察任务板和注册表的对应关系。\n2. 分别在两个 worktree 里运行 `git status`,感受目录隔离。\n3. 删除一个 worktree,并确认对应任务是否被正确收尾。\n\n读完这一章,你应该能自己说清楚这句话:\n\n**任务系统管“做什么”,worktree 系统管“在哪做且互不干扰”。**\n"
+ },
+ {
+ "version": "s19",
+ "slug": "s19-mcp-plugin",
"locale": "zh",
- "title": "s12: Worktree + Task Isolation (Worktree 任务隔离)",
- "content": "# s12: Worktree + Task Isolation (Worktree 任务隔离)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"各干各的目录, 互不干扰\"* -- 任务管目标, worktree 管目录, 按 ID 绑定。\n\n## 问题\n\n到 s11, Agent 已经能自主认领和完成任务。但所有任务共享一个目录。两个 Agent 同时重构不同模块 -- A 改 `config.py`, B 也改 `config.py`, 未提交的改动互相污染, 谁也没法干净回滚。\n\n任务板管 \"做什么\" 但不管 \"在哪做\"。解法: 给每个任务一个独立的 git worktree 目录, 用任务 ID 把两边关联起来。\n\n## 解决方案\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## 工作原理\n\n1. **创建任务。** 先把目标持久化。\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **创建 worktree 并绑定任务。** 传入 `task_id` 自动将任务推进到 `in_progress`。\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\n绑定同时写入两侧状态:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **在 worktree 中执行命令。** `cwd` 指向隔离目录。\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **收尾。** 两种选择:\n - `worktree_keep(name)` -- 保留目录供后续使用。\n - `worktree_remove(name, complete_task=True)` -- 删除目录, 完成绑定任务, 发出事件。一个调用搞定拆除 + 完成。\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **事件流。** 每个生命周期步骤写入 `.worktrees/events.jsonl`:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\n事件类型: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\n\n崩溃后从 `.tasks/` + `.worktrees/index.json` 重建现场。会话记忆是易失的; 磁盘状态是持久的。\n\n## 相对 s11 的变更\n\n| 组件 | 之前 (s11) | 之后 (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| 协调 | 任务板 (owner/status) | 任务板 + worktree 显式绑定 |\n| 执行范围 | 共享目录 | 每个任务独立目录 |\n| 可恢复性 | 仅任务状态 | 任务状态 + worktree 索引 |\n| 收尾 | 任务完成 | 任务完成 + 显式 keep/remove |\n| 生命周期可见性 | 隐式日志 | `.worktrees/events.jsonl` 显式事件流 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n"
+ "title": "s19: MCP & Plugin System (MCP 与插件系统)",
+ "kind": "chapter",
+ "filename": "s19-mcp-plugin.md",
+ "content": "# s19: MCP & Plugin System (MCP 与插件系统)\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > [ s19 ]`\n\n> *工具不必都写死在主程序里。外部进程也可以把能力接进你的 agent。*\n\n## 这一章到底在讲什么\n\n前面所有章节里,工具基本都写在你自己的 Python 代码里。\n\n这当然是最适合教学的起点。\n\n但真实系统走到一定阶段以后,会很自然地遇到这个需求:\n\n> “能不能让外部程序也把工具接进来,而不用每次都改主程序?”\n\n这就是 MCP 要解决的问题。\n\n## 先用最简单的话解释 MCP\n\n你可以先把 MCP 理解成:\n\n**一套让 agent 和外部工具程序对话的统一协议。**\n\n在教学版里,不必一开始就背很多协议细节。 \n你只要先抓住这条主线:\n\n1. 启动一个外部工具服务进程\n2. 问它“你有哪些工具”\n3. 当模型要用它的工具时,把请求转发给它\n4. 再把结果带回 agent 主循环\n\n这已经够理解 80% 的核心机制了。\n\n## 为什么这一章放在最后\n\n因为 MCP 不是主循环的起点,而是主循环稳定之后的扩展层。\n\n如果你还没真正理解:\n\n- agent loop\n- tool call\n- permission\n- task\n- worktree\n\n那 MCP 只会看起来像又一套复杂接口。\n\n但当你已经有了前面的心智,再看 MCP,你会发现它本质上只是:\n\n**把“工具来源”从“本地硬编码”升级成“外部可插拔”。**\n\n## 建议联读\n\n- 如果你只把 MCP 理解成“远程 tools”,先看 [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md),把 tools、resources、prompts、plugin 中介层一起放回平台边界里。\n- 如果你想确认外部能力为什么仍然要回到同一条执行面,回看 [`s02b-tool-execution-runtime.md`](./s02b-tool-execution-runtime.md)。\n- 如果你开始把“query 控制平面”和“外部能力路由”完全分开理解,建议配合看 [`s00a-query-control-plane.md`](./s00a-query-control-plane.md)。\n\n## 最小心智模型\n\n```text\nLLM\n |\n | asks to call a tool\n v\nAgent tool router\n |\n +-- native tool -> 本地 Python handler\n |\n +-- MCP tool -> 外部 MCP server\n |\n v\n return result\n```\n\n## 最小系统里最重要的三件事\n\n### 1. 有一个 MCP client\n\n它负责:\n\n- 启动外部进程\n- 发送请求\n- 接收响应\n\n### 2. 有一个工具名前缀规则\n\n这是为了避免命名冲突。\n\n最常见的做法是:\n\n```text\nmcp__{server}__{tool}\n```\n\n比如:\n\n```text\nmcp__postgres__query\nmcp__browser__open_tab\n```\n\n这样一眼就知道:\n\n- 这是 MCP 工具\n- 它来自哪个 server\n- 它原始工具名是什么\n\n### 3. 有一个统一路由器\n\n路由器只做一件事:\n\n- 如果是本地工具,就交给本地 handler\n- 如果是 MCP 工具,就交给 MCP client\n\n## Plugin 又是什么\n\n如果 MCP 解决的是“外部工具怎么通信”, \n那 plugin 解决的是“这些外部工具配置怎么被发现”。\n\n最小 plugin 可以非常简单:\n\n```text\n.claude-plugin/\n plugin.json\n```\n\n里面写:\n\n- 插件名\n- 版本\n- 它提供哪些 MCP server\n- 每个 server 的启动命令是什么\n\n## 最小配置长什么样\n\n```json\n{\n \"name\": \"my-db-tools\",\n \"version\": \"1.0.0\",\n \"mcpServers\": {\n \"postgres\": {\n \"command\": \"npx\",\n \"args\": [\"-y\", \"@modelcontextprotocol/server-postgres\"]\n }\n }\n}\n```\n\n这个配置并不复杂。\n\n它本质上只是在告诉主程序:\n\n> “如果你想接这个 server,就用这条命令把它拉起来。”\n\n## 最小实现步骤\n\n### 第一步:写一个 `MCPClient`\n\n它至少要有三个能力:\n\n- `connect()`\n- `list_tools()`\n- `call_tool()`\n\n### 第二步:把外部工具标准化成 agent 能看懂的工具定义\n\n也就是说,把 MCP server 暴露的工具,转成 agent 工具池里的统一格式。\n\n### 第三步:加前缀\n\n这样主程序就能区分:\n\n- 本地工具\n- 外部工具\n\n### 第四步:写一个 router\n\n```python\nif tool_name.startswith(\"mcp__\"):\n return mcp_router.call(tool_name, arguments)\nelse:\n return native_handler(arguments)\n```\n\n### 第五步:仍然走同一条权限管道\n\n这是非常关键的一点:\n\n**MCP 工具虽然来自外部,但不能绕开 permission。**\n\n不然你等于在系统边上开了个安全后门。\n\n如果你想把这一层再收得更稳,最好再把结果也标准化回同一条总线:\n\n```python\n{\n \"source\": \"mcp\",\n \"server\": \"figma\",\n \"tool\": \"inspect\",\n \"status\": \"ok\",\n \"preview\": \"...\",\n}\n```\n\n这表示:\n\n- 路由前要过共享权限闸门\n- 路由后不论本地还是远程,结果都要转成主循环看得懂的统一格式\n\n## 如何接到整个系统里\n\n如果你读到这里还觉得 MCP 像“外挂”,通常是因为没有把它放回整条主回路里。\n\n更完整的接法应该看成:\n\n```text\n启动时\n ->\nPluginLoader 找到 manifest\n ->\n得到 server 配置\n ->\nMCP client 连接 server\n ->\nlist_tools 并标准化名字\n ->\n和 native tools 一起合并进同一个工具池\n\n运行时\n ->\nLLM 产出 tool_use\n ->\n统一权限闸门\n ->\nnative route 或 mcp route\n ->\n结果标准化\n ->\ntool_result 回到同一个主循环\n```\n\n这段流程里最关键的不是“外部”两个字,而是:\n\n**进入方式不同,但进入后必须回到同一条控制面和执行面。**\n\n## Plugin、MCP Server、MCP Tool 不要混成一层\n\n这是初学者最容易在本章里打结的地方。\n\n可以直接按下面三层记:\n\n| 层级 | 它是什么 | 它负责什么 |\n|---|---|---|\n| plugin manifest | 一份配置声明 | 告诉系统要发现和启动哪些 server |\n| MCP server | 一个外部进程 / 连接对象 | 对外暴露一组能力 |\n| MCP tool | server 暴露的一项具体调用能力 | 真正被模型点名调用 |\n\n换成一句最短的话说:\n\n- plugin 负责“发现”\n- server 负责“连接”\n- tool 负责“调用”\n\n只要这三层还分得清,MCP 这章的主体心智就不会乱。\n\n## 这一章最关键的数据结构\n\n### 1. server 配置\n\n```python\n{\n \"command\": \"npx\",\n \"args\": [\"-y\", \"...\"],\n \"env\": {}\n}\n```\n\n### 2. 标准化后的工具定义\n\n```python\n{\n \"name\": \"mcp__postgres__query\",\n \"description\": \"Run a SQL query\",\n \"input_schema\": {...}\n}\n```\n\n### 3. client 注册表\n\n```python\nclients = {\n \"postgres\": mcp_client_instance\n}\n```\n\n## 初学者最容易被带偏的地方\n\n### 1. 一上来讲太多协议细节\n\n这章最容易失控。\n\n因为一旦开始讲完整协议生态,很快会出现:\n\n- transports\n- auth\n- resources\n- prompts\n- streaming\n- connection recovery\n\n这些都存在,但不该挡住主线。\n\n主线只有一句话:\n\n**外部工具也能像本地工具一样接进 agent。**\n\n### 2. 把 MCP 当成一套完全不同的工具系统\n\n不是。\n\n它最终仍然应该汇入你原来的工具体系:\n\n- 一样要注册\n- 一样要出现在工具池里\n- 一样要过权限\n- 一样要返回 `tool_result`\n\n### 3. 忽略命名与路由\n\n如果没有统一前缀和统一路由,系统会很快乱掉。\n\n## 教学边界\n\n这一章正文先停在 `tools-first` 是对的。\n\n因为教学主线最需要先讲清的是:\n\n- 外部能力怎样被发现\n- 怎样被统一命名和路由\n- 怎样继续经过同一条权限与 `tool_result` 回流\n\n只要这一层已经成立,读者就已经真正理解了:\n\n**MCP / plugin 不是外挂,而是接回同一控制面的外部能力入口。**\n\ntransport、认证、resources、prompts、插件生命周期这些更大范围的内容,应该放到平台桥接资料里继续展开。\n\n## 正文先停在 tools-first,平台层再看桥接文档\n\n这一章的正文故意停在“外部工具如何接进 agent”这一层。 \n这是教学上的刻意取舍,不是缺失。\n\n如果你准备继续补平台边界,再去看:\n\n- [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md)\n\n那篇会把 MCP 再往上补成一张平台地图,包括:\n\n- server 配置作用域\n- transport 类型\n- 连接状态:`connected / pending / needs-auth / failed / disabled`\n- tools 之外的 `resources / prompts / elicitation`\n- auth 该放在哪一层理解\n\n这样安排的好处是:\n\n- 正文不失焦\n- 读者又不会误以为 MCP 只有一个 `list_tools + call_tool`\n\n## 这一章和全仓库的关系\n\n如果说前 18 章都在教你把系统内部搭起来, \n那 `s19` 在教你:\n\n**如何把系统向外打开。**\n\n从这里开始,工具不再只来自你手写的 Python 文件, \n还可以来自别的进程、别的系统、别的服务。\n\n这就是为什么它适合作为最后一章。\n\n## 学完这章后,你应该能回答\n\n- MCP 的核心到底是什么?\n- 为什么它应该放在整个学习路径的最后?\n- 为什么 MCP 工具也必须走同一条权限与路由逻辑?\n- plugin 和 MCP 分别解决什么问题?\n\n---\n\n**一句话记住:MCP 的本质,不是协议名词堆砌,而是把外部工具安全、统一地接进你的 agent。**\n"
+ },
+ {
+ "version": null,
+ "slug": "s19a-mcp-capability-layers",
+ "locale": "zh",
+ "title": "s19a: MCP Capability Layers (MCP 能力层地图)",
+ "kind": "bridge",
+ "filename": "s19a-mcp-capability-layers.md",
+ "content": "# s19a: MCP Capability Layers (MCP 能力层地图)\n\n> `s19` 的主线仍然应该坚持“先做 tools-first”。 \n> 这篇桥接文档负责补上另一层心智:\n>\n> **MCP 不只是外部工具接入,它是一组能力层。**\n\n## 建议怎么联读\n\n如果你希望 MCP 这块既不学偏,也不学浅,推荐这样看:\n\n- 先看 [`s19-mcp-plugin.md`](./s19-mcp-plugin.md),先把 tools-first 主线走通。\n- 再看 [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md),确认外部能力最后怎样接回统一工具总线。\n- 如果状态结构开始混,再对照 [`data-structures.md`](./data-structures.md)。\n- 如果概念边界开始混,再回 [`glossary.md`](./glossary.md) 和 [`entity-map.md`](./entity-map.md)。\n\n## 为什么要单独补这一篇\n\n如果你是为了教学,从 0 到 1 手搓一个类似系统,那么 `s19` 主线先只讲外部工具,这是对的。\n\n因为最容易理解的入口就是:\n\n- 连接一个外部 server\n- 拿到工具列表\n- 调用工具\n- 把结果带回 agent\n\n但如果你想把系统做到接近 95%-99% 的还原度,你迟早会遇到这些问题:\n\n- server 是用 stdio、http、sse 还是 ws 连接?\n- 为什么有些 server 是 connected,有些是 pending,有些是 needs-auth?\n- tools 之外,resources 和 prompts 是什么位置?\n- elicitation 为什么会变成一类特殊交互?\n- OAuth / XAA 这种认证流程该放在哪一层理解?\n\n这时候如果没有一张“能力层地图”,MCP 就会越学越散。\n\n## 先解释几个名词\n\n### 什么是能力层\n\n能力层,就是把一个复杂系统拆成几层职责清楚的面。\n\n这里的意思是:\n\n> 不要把所有 MCP 细节混成一团,而要知道每一层到底解决什么问题。\n\n### 什么是 transport\n\n`transport` 可以理解成“连接通道”。\n\n比如:\n\n- stdio\n- http\n- sse\n- websocket\n\n### 什么是 elicitation\n\n这个词比较生。\n\n你可以先把它理解成:\n\n> 外部 MCP server 反过来向用户请求额外输入的一种交互。\n\n也就是说,不再只是 agent 主动调工具,而是 server 也能说:\n\n“我还需要你给我一点信息,我才能继续。”\n\n## 最小心智模型\n\n先把 MCP 画成 6 层:\n\n```text\n1. Config Layer\n server 配置长什么样\n\n2. Transport Layer\n 用什么通道连 server\n\n3. Connection State Layer\n 现在是 connected / pending / failed / needs-auth\n\n4. Capability Layer\n tools / resources / prompts / elicitation\n\n5. Auth Layer\n 是否需要认证,认证状态如何\n\n6. Router Integration Layer\n 如何接回 tool router / permission / notifications\n```\n\n最重要的一点是:\n\n**tools 只是其中一层,不是全部。**\n\n## 为什么正文仍然应该坚持 tools-first\n\n这点非常重要。\n\n虽然 MCP 平台本身有多层能力,但正文主线仍然应该这样安排:\n\n### 第一步:先教外部 tools\n\n因为它和前面的主线最自然衔接:\n\n- 本地工具\n- 外部工具\n- 同一条 router\n\n### 第二步:再告诉读者还有其他能力层\n\n例如:\n\n- resources\n- prompts\n- elicitation\n- auth\n\n### 第三步:再决定是否继续实现\n\n这才符合你的教学目标:\n\n**先做出类似系统,再补平台层高级能力。**\n\n## 关键数据结构\n\n### 1. ScopedMcpServerConfig\n\n最小教学版建议至少让读者看到这个概念:\n\n```python\nconfig = {\n \"name\": \"postgres\",\n \"type\": \"stdio\",\n \"command\": \"npx\",\n \"args\": [\"-y\", \"...\"],\n \"scope\": \"project\",\n}\n```\n\n这里的 `scope` 很重要。\n\n因为 server 配置不一定都来自同一个地方。\n\n### 2. MCP Connection State\n\n```python\nserver_state = {\n \"name\": \"postgres\",\n \"status\": \"connected\", # pending / failed / needs-auth / disabled\n \"config\": {...},\n}\n```\n\n### 3. MCPToolSpec\n\n```python\ntool = {\n \"name\": \"mcp__postgres__query\",\n \"description\": \"...\",\n \"input_schema\": {...},\n}\n```\n\n### 4. ElicitationRequest\n\n```python\nrequest = {\n \"server_name\": \"some-server\",\n \"message\": \"Please provide additional input\",\n \"requested_schema\": {...},\n}\n```\n\n这一步不是要求你主线立刻实现它,而是要让读者知道:\n\n**MCP 不一定永远只是“模型调工具”。**\n\n## 一张更完整但仍然清楚的图\n\n```text\nMCP Config\n |\n v\nTransport\n |\n v\nConnection State\n |\n +-- connected\n +-- pending\n +-- needs-auth\n +-- failed\n |\n v\nCapabilities\n +-- tools\n +-- resources\n +-- prompts\n +-- elicitation\n |\n v\nRouter / Permission / Notification Integration\n```\n\n## Auth 为什么不要在主线里讲太多\n\n这也是教学取舍里很重要的一点。\n\n认证是真实系统里确实存在的能力层。 \n但如果正文一开始就掉进 OAuth/XAA 流程,初学者会立刻丢主线。\n\n所以更好的讲法是:\n\n- 先告诉读者:有 auth layer\n- 再告诉读者:connected / needs-auth 是不同连接状态\n- 只有做平台层进阶时,再详细展开认证流程\n\n这就既没有幻觉,也没有把人带偏。\n\n## 它和 `s19`、`s02a` 的关系\n\n- `s19` 正文继续负责 tools-first 教学\n- 这篇负责补清平台层地图\n- `s02a` 的 Tool Control Plane 则解释 MCP 最终怎么接回统一工具总线\n\n三者合在一起,读者才会真正知道:\n\n**MCP 是外部能力平台,而 tools 只是它最先进入主线的那个切面。**\n\n## 初学者最容易犯的错\n\n### 1. 把 MCP 只理解成“外部工具目录”\n\n这会让后面遇到 auth / resources / prompts / elicitation 时很困惑。\n\n### 2. 一上来就沉迷 transport 和 OAuth 细节\n\n这样会直接打断主线。\n\n### 3. 让 MCP 工具绕过 permission\n\n这会在系统边上开一个很危险的后门。\n\n### 4. 不区分 server 配置、连接状态、能力暴露\n\n这三层一混,平台层就会越学越乱。\n\n## 教学边界\n\n这篇最重要的,不是把 MCP 所有外设细节都讲完,而是先守住四层边界:\n\n- server 配置\n- 连接状态\n- capability 暴露\n- permission / routing 接入点\n\n只要这四层不混,你就已经能自己手搓一个接近真实系统主脉络的外部能力入口。 \n认证状态机、resource/prompt 接入、server 回问和重连策略,都属于后续平台扩展。\n\n## 一句话记住\n\n**`s19` 主线应该先教“外部工具接入”,而平台层还需要额外理解 MCP 的能力层地图。**\n"
+ },
+ {
+ "version": null,
+ "slug": "teaching-scope",
+ "locale": "zh",
+ "title": "Teaching Scope (教学范围说明)",
+ "kind": "bridge",
+ "filename": "teaching-scope.md",
+ "content": "# Teaching Scope (教学范围说明)\n\n> 这份文档不是讲某一章,而是说明整个教学仓库到底要教什么、不教什么,以及每一章应该怎么写才不会把读者带偏。\n\n## 这份仓库的目标\n\n这不是一份“逐行对照某份源码”的注释仓库。\n\n这份仓库真正的目标是:\n\n**教开发者从 0 到 1 手搓一个结构完整、高保真的 coding agent harness。**\n\n这里强调 3 件事:\n\n1. 读者真的能自己实现出来。\n2. 读者能抓住系统主脉络,而不是淹没在边角细节里。\n3. 读者对关键机制的理解足够高保真,不会学到不存在的机制。\n\n## 什么必须讲清楚\n\n主线章节必须优先讲清下面这些内容:\n\n- 整个系统有哪些核心模块\n- 模块之间如何协作\n- 每个模块解决什么问题\n- 关键状态保存在哪里\n- 关键数据结构长什么样\n- 主循环如何把这些机制接进来\n\n如果一个章节讲完以后,读者还不知道“这个机制到底放在系统哪一层、保存了哪些状态、什么时候被调用”,那这章就还没讲透。\n\n## 什么不要占主线篇幅\n\n下面这些内容,不是完全不能提,而是**不应该占用主线正文的大量篇幅**:\n\n- 打包、编译、发布流程\n- 跨平台兼容胶水\n- 遥测、企业策略、账号体系\n- 与教学主线无关的历史兼容分支\n- 只对特定产品环境有意义的接线细节\n- 某份上游源码里的函数名、文件名、行号级对照\n\n这些内容最多作为:\n\n- 维护者备注\n- 附录\n- 桥接资料里的平台扩展说明\n\n而不应该成为初学者第一次学习时的主线。\n\n## 真正的“高保真”是什么意思\n\n教学仓库追求的高保真,不是所有边角细节都 1:1。\n\n这里的高保真,是指这些东西要尽量贴近真实系统主干:\n\n- 核心运行模式\n- 主要模块边界\n- 关键数据结构\n- 模块之间的协作方式\n- 关键状态转换\n\n换句话说:\n\n**主干尽量高保真,外围细节可以做教学取舍。**\n\n## 面向谁来写\n\n本仓库默认读者不是“已经做过复杂 agent 平台的人”。\n\n更合理的默认读者应该是:\n\n- 会一点编程\n- 能读懂基本 Python\n- 但没有系统实现过 agent\n\n所以写作时要假设:\n\n- 很多术语是第一次见\n- 很多系统设计名词不能直接甩出来不解释\n- 同一个概念不能分散在五个地方才拼得完整\n\n## 每一章的推荐结构\n\n主线章节尽量遵守这条顺序:\n\n1. `这一章要解决什么问题`\n2. `先解释几个名词`\n3. `最小心智模型`\n4. `关键数据结构`\n5. `最小实现`\n6. `如何接到主循环里`\n7. `初学者最容易犯的错`\n8. `教学边界`\n\n这条顺序的价值在于:\n\n- 先让读者知道为什么需要这个机制\n- 再让读者知道这个机制到底是什么\n- 然后马上看到它怎么落地\n\n这里把最后一节写成 `教学边界`,而不是“继续补一大串外围复杂度清单”,是因为教学仓库更应该先帮读者守住:\n\n- 这一章先学到哪里就够了\n- 哪些复杂度现在不要一起拖进来\n- 读者真正该自己手搓出来的最小正确版本是什么\n\n## 术语使用规则\n\n只要出现这些类型的词,就应该解释:\n\n- 软件设计模式\n- 数据结构名词\n- 并发与进程相关名词\n- 协议与网络相关名词\n- 初学者不熟悉的工程术语\n\n例如:\n\n- 状态机\n- 调度器\n- 队列\n- worktree\n- DAG\n- 协议 envelope\n\n不要只给名字,不给解释。\n\n## “最小正确版本”原则\n\n很多真实机制都很复杂。\n\n但教学版不应该一开始就把所有分支一起讲。\n\n更好的顺序是:\n\n1. 先给出一个最小但正确的版本\n2. 解释它已经解决了哪部分核心问题\n3. 再讲如果继续迭代应该补什么\n\n例如:\n\n- 权限系统先做 `deny -> mode -> allow -> ask`\n- 错误恢复先做 3 条主恢复路径\n- 任务系统先做任务记录、依赖、解锁\n- 团队协议先做 request/response + request_id\n\n## 文档和代码要一起维护,而不是各讲各的\n\n如果正文和本地 `agents/*.py` 没有对齐,读者一打开代码就会重新混乱。\n\n所以维护者重写章节时,应该同步检查三件事:\n\n1. 这章正文里的关键状态,代码里是否真有对应结构\n2. 这章正文里的主回路,代码里是否真有对应入口函数\n3. 这章正文里强调的“教学边界”,代码里是否也没有提前塞进过多外层复杂度\n\n最稳的做法是让每章都能对应到:\n\n- 1 个主文件\n- 1 组关键状态结构\n- 1 条最值得先看的执行路径\n\n如果维护者需要一份“按章节读本仓库代码”的地图,建议配合看:\n\n- [`s00f-code-reading-order.md`](./s00f-code-reading-order.md)\n\n## 维护者重写时的检查清单\n\n如果你在重写某一章,可以用下面这份清单自检:\n\n- 这章第一屏有没有明确说明“为什么需要它”\n- 是否先解释了新名词,再使用新名词\n- 是否给出了最小心智模型图或流程\n- 是否明确列出关键数据结构\n- 是否说明了它如何接进主循环\n- 是否区分了“核心机制”和“产品化外围细节”\n- 是否列出了初学者最容易混淆的点\n- 是否避免制造源码里并不存在的幻觉机制\n\n## 维护者如何使用“逆向源码”\n\n逆向得到的源码,在这套仓库里应当只扮演一个角色:\n\n**维护者的校准参考。**\n\n它的用途是:\n\n- 校验主干机制有没有讲错\n- 校验关键状态和模块边界有没有遗漏\n- 校验教学实现有没有偏离到错误方向\n\n它不应该成为读者理解正文的前提。\n\n正文应该做到:\n\n> 即使读者完全不看那份源码,也能把核心系统自己做出来。\n\n## 这份教学仓库应该追求什么分数\n\n如果满分是 150 分,一个接近满分的教学仓库应同时做到:\n\n- 主线清楚\n- 章节顺序合理\n- 新名词解释完整\n- 数据结构清晰\n- 机制边界准确\n- 例子可运行\n- 升级路径自然\n\n真正决定分数高低的,不是“提到了多少细节”,而是:\n\n**提到的关键细节是否真的讲透,没提的非关键细节是否真的可以安全省略。**\n"
+ },
+ {
+ "version": null,
+ "slug": "team-task-lane-model",
+ "locale": "zh",
+ "title": "Team Task Lane Model (队友-任务-车道模型)",
+ "kind": "bridge",
+ "filename": "team-task-lane-model.md",
+ "content": "# Team Task Lane Model (队友-任务-车道模型)\n\n> 到了 `s15-s18`,读者最容易混掉的,不是某个函数名,而是:\n>\n> **系统里到底是谁在工作、谁在协调、谁在记录目标、谁在提供执行目录。**\n\n## 这篇桥接文档解决什么问题\n\n如果你一路从 `s15` 看到 `s18`,脑子里很容易把下面这些词混在一起:\n\n- teammate\n- protocol request\n- task\n- runtime task\n- worktree\n\n它们都和“工作推进”有关。 \n但它们不是同一层。\n\n如果这层边界不单独讲清,后面读者会经常出现这些困惑:\n\n- 队友是不是任务本身?\n- `request_id` 和 `task_id` 有什么区别?\n- worktree 是不是后台任务的一种?\n- 一个任务完成了,为什么 worktree 还能保留?\n\n这篇就是专门用来把这几层拆开的。\n\n## 建议怎么联读\n\n最推荐的读法是:\n\n1. 先看 [`s15-agent-teams.md`](./s15-agent-teams.md),确认长期队友在讲什么。\n2. 再看 [`s16-team-protocols.md`](./s16-team-protocols.md),确认请求-响应协议在讲什么。\n3. 再看 [`s17-autonomous-agents.md`](./s17-autonomous-agents.md),确认自治认领在讲什么。\n4. 最后看 [`s18-worktree-task-isolation.md`](./s18-worktree-task-isolation.md),确认隔离执行车道在讲什么。\n\n如果你开始混:\n\n- 回 [`entity-map.md`](./entity-map.md) 看模块边界。\n- 回 [`data-structures.md`](./data-structures.md) 看记录结构。\n- 回 [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) 看“目标任务”和“运行时执行槽位”的差别。\n\n## 先给结论\n\n先记住这一组最重要的区分:\n\n```text\nteammate\n = 谁在长期参与协作\n\nprotocol request\n = 团队内部一次需要被追踪的协调请求\n\ntask\n = 要做什么\n\nruntime task / execution slot\n = 现在有什么执行单元正在跑\n\nworktree\n = 在哪做,而且不和别人互相踩目录\n```\n\n这五层里,最容易混的是最后三层:\n\n- `task`\n- `runtime task`\n- `worktree`\n\n所以你必须反复问自己:\n\n- 这是“目标”吗?\n- 这是“执行中的东西”吗?\n- 这是“执行目录”吗?\n\n## 一张最小清晰图\n\n```text\nTeam Layer\n teammate: alice (frontend)\n teammate: bob (backend)\n\nProtocol Layer\n request_id=req_01\n kind=plan_approval\n status=pending\n\nWork Graph Layer\n task_id=12\n subject=\"Implement login page\"\n owner=\"alice\"\n status=\"in_progress\"\n\nRuntime Layer\n runtime_id=rt_01\n type=in_process_teammate\n status=running\n\nExecution Lane Layer\n worktree=login-page\n path=.worktrees/login-page\n status=active\n```\n\n你可以看到:\n\n- `alice` 不是任务\n- `request_id` 不是任务\n- `runtime_id` 也不是任务\n- `worktree` 更不是任务\n\n真正表达“这件工作本身”的,只有 `task_id=12` 那层。\n\n## 1. Teammate:谁在长期协作\n\n这是 `s15` 开始建立的层。\n\n它回答的是:\n\n- 这个长期 worker 叫什么\n- 它是什么角色\n- 它当前是 working、idle 还是 shutdown\n- 它有没有独立 inbox\n\n最小例子:\n\n```python\nmember = {\n \"name\": \"alice\",\n \"role\": \"frontend\",\n \"status\": \"idle\",\n}\n```\n\n这层的核心不是“又多开一个 agent”。\n\n而是:\n\n> 系统开始有长期存在、可重复接活、可被点名协作的身份。\n\n## 2. Protocol Request:谁在协调什么\n\n这是 `s16` 建立的层。\n\n它回答的是:\n\n- 有谁向谁发起了一个需要追踪的请求\n- 这条请求是什么类型\n- 它现在是 pending、approved 还是 rejected\n\n最小例子:\n\n```python\nrequest = {\n \"request_id\": \"a1b2c3d4\",\n \"kind\": \"plan_approval\",\n \"from\": \"alice\",\n \"to\": \"lead\",\n \"status\": \"pending\",\n}\n```\n\n这一层不要和普通聊天混。\n\n因为它不是“发一条消息就算完”,而是:\n\n> 一条可以被继续更新、继续审核、继续恢复的协调记录。\n\n## 3. Task:要做什么\n\n这是 `s12` 的工作图任务,也是 `s17` 自治认领的对象。\n\n它回答的是:\n\n- 目标是什么\n- 谁负责\n- 是否有阻塞\n- 当前进度如何\n\n最小例子:\n\n```python\ntask = {\n \"id\": 12,\n \"subject\": \"Implement login page\",\n \"status\": \"in_progress\",\n \"owner\": \"alice\",\n \"blockedBy\": [],\n}\n```\n\n这层的关键词是:\n\n**目标**\n\n不是目录,不是协议,不是进程。\n\n## 4. Runtime Task / Execution Slot:现在有什么执行单元在跑\n\n这一层在 `s13` 的桥接文档里已经单独解释过,但到了 `s15-s18` 必须再提醒一次。\n\n比如:\n\n- 一个后台 shell 正在跑\n- 一个长期 teammate 正在工作\n- 一个 monitor 正在观察外部状态\n\n这些都更像:\n\n> 正在运行的执行槽位\n\n而不是“任务目标本身”。\n\n最小例子:\n\n```python\nruntime = {\n \"id\": \"rt_01\",\n \"type\": \"in_process_teammate\",\n \"status\": \"running\",\n \"work_graph_task_id\": 12,\n}\n```\n\n这里最重要的边界是:\n\n- 一个任务可以派生多个 runtime task\n- 一个 runtime task 通常只是“如何执行”的一个实例\n\n## 5. Worktree:在哪做\n\n这是 `s18` 建立的执行车道层。\n\n它回答的是:\n\n- 这份工作在哪个独立目录里做\n- 这条目录车道对应哪个任务\n- 这条车道现在是 active、kept 还是 removed\n\n最小例子:\n\n```python\nworktree = {\n \"name\": \"login-page\",\n \"path\": \".worktrees/login-page\",\n \"task_id\": 12,\n \"status\": \"active\",\n}\n```\n\n这层的关键词是:\n\n**执行边界**\n\n它不是工作目标本身,而是:\n\n> 让这份工作在独立目录里推进的执行车道。\n\n## 这五层怎么连起来\n\n你可以把后段章节连成下面这条链:\n\n```text\nteammate\n 通过 protocol request 协调\n 认领 task\n 作为一个 runtime execution slot 持续运行\n 在某条 worktree lane 里改代码\n```\n\n如果写得更具体一点,会变成:\n\n```text\nalice (teammate)\n ->\n收到或发起一个 request_id\n ->\n认领 task #12\n ->\n开始作为执行单元推进工作\n ->\n进入 worktree \"login-page\"\n ->\n在 .worktrees/login-page 里运行命令和改文件\n```\n\n## 一个最典型的混淆例子\n\n很多读者会把这句话说成:\n\n> “alice 就是在做 login-page 这个 worktree 任务。”\n\n这句话把三层东西混成了一句:\n\n- `alice`:队友\n- `login-page`:worktree\n- “任务”:工作图任务\n\n更准确的说法应该是:\n\n> `alice` 认领了 `task #12`,并在 `login-page` 这条 worktree 车道里推进它。\n\n一旦你能稳定地这样表述,后面几章就不容易乱。\n\n## 初学者最容易犯的错\n\n### 1. 把 teammate 和 task 混成一个对象\n\n队友是执行者,任务是目标。\n\n### 2. 把 `request_id` 和 `task_id` 混成一个 ID\n\n一个负责协调,一个负责工作目标,不是同一层。\n\n### 3. 把 runtime slot 当成 durable task\n\n运行时执行单元会结束,但 durable task 还可能继续存在。\n\n### 4. 把 worktree 当成任务本身\n\nworktree 只是执行目录边界,不是任务目标。\n\n### 5. 只会讲“系统能并行”,却说不清每层对象各自负责什么\n\n这是最常见也最危险的模糊表达。\n\n真正清楚的教学,不是说“这里好多 agent 很厉害”,而是能把下面这句话讲稳:\n\n> 队友负责长期协作,请求负责协调流程,任务负责表达目标,运行时槽位负责承载执行,worktree 负责隔离执行目录。\n\n## 读完这篇你应该能自己说清楚\n\n至少能完整说出下面这两句话:\n\n1. `s17` 的自治认领,认领的是 `s12` 的工作图任务,不是 `s13` 的运行时槽位。\n2. `s18` 的 worktree,绑定的是任务的执行车道,而不是把任务本身变成目录。\n\n如果这两句你已经能稳定说清,`s15-s18` 这一大段主线就基本不会再拧巴了。\n"
+ },
+ {
+ "version": null,
+ "slug": "data-structures",
+ "locale": "ja",
+ "title": "Core Data Structures (主要データ構造マップ)",
+ "kind": "bridge",
+ "filename": "data-structures.md",
+ "content": "# Core Data Structures (主要データ構造マップ)\n\n> agent 学習でいちばん迷いやすいのは、機能の多さそのものではなく、 \n> **「今の状態がどの record に入っているのか」が見えなくなること**です。 \n> この文書は、主線章と bridge doc に繰り返し出てくる record をひとつの地図として並べ直し、 \n> 読者が system 全体を「機能一覧」ではなく「状態の配置図」として理解できるようにするための資料です。\n\n## どう使うか\n\nこの資料は辞書というより、`state map` として使ってください。\n\n- 単語の意味が怪しくなったら [`glossary.md`](./glossary.md) へ戻る\n- object 同士の境界が混ざったら [`entity-map.md`](./entity-map.md) を開く\n- `TaskRecord` と `RuntimeTaskState` が混ざったら [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) を読む\n- MCP で tools 以外の layer が混ざったら [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md) を併読する\n\n## 最初にこの 2 本だけは覚える\n\n### 原則 1: 内容状態と制御状態を分ける\n\n内容状態とは、system が「何を扱っているか」を表す状態です。\n\n例:\n\n- `messages`\n- `tool_result`\n- memory の本文\n- task の title や description\n\n制御状態とは、system が「次にどう進むか」を表す状態です。\n\n例:\n\n- `turn_count`\n- `transition`\n- `has_attempted_compact`\n- `max_output_tokens_override`\n- `pending_classifier_check`\n\nこの 2 つを混ぜると、読者はすぐに次の疑問で詰まります。\n\n- なぜ `messages` だけでは足りないのか\n- なぜ control plane が必要なのか\n- なぜ recovery や compact が別 state を持つのか\n\n### 原則 2: durable state と runtime state を分ける\n\n`durable state` は、session をまたいでも残す価値がある状態です。\n\n例:\n\n- task\n- memory\n- schedule\n- team roster\n\n`runtime state` は、system が動いている間だけ意味を持つ状態です。\n\n例:\n\n- 現在の permission decision\n- 今走っている runtime task\n- active MCP connection\n- 今回の query の continuation reason\n\nこの区別が曖昧だと、task・runtime slot・notification・schedule・worktree が全部同じ層に見えてしまいます。\n\n## 1. Query と会話制御の状態\n\nこの層の核心は:\n\n> 会話内容を持つ record と、query の進行理由を持つ record は別物である\n\nです。\n\n### `Message`\n\n役割:\n\n- user と assistant の会話履歴を持つ\n- tool 呼び出し前後の往復も保存する\n\n最小形:\n\n```python\nmessage = {\n \"role\": \"user\" | \"assistant\",\n \"content\": \"...\",\n}\n```\n\nagent が tool を使い始めると、`content` は単なる文字列では足りなくなり、次のような block list になることがあります。\n\n- text block\n- `tool_use`\n- `tool_result`\n\nこの record の本質は、**会話内容の記録**です。 \n「なぜ次ターンへ進んだか」は `Message` の責務ではありません。\n\n関連章:\n\n- `s01`\n- `s02`\n- `s06`\n- `s10`\n\n### `NormalizedMessage`\n\n役割:\n\n- さまざまな内部 message を、model API に渡せる統一形式へ揃える\n\n最小形:\n\n```python\nmessage = {\n \"role\": \"user\" | \"assistant\",\n \"content\": [\n {\"type\": \"text\", \"text\": \"...\"},\n ],\n}\n```\n\n`Message` と `NormalizedMessage` の違い:\n\n- `Message`: system 内部の履歴 record に近い\n- `NormalizedMessage`: model 呼び出し直前の入力形式に近い\n\nつまり、前者は「何を覚えているか」、後者は「何を送るか」です。\n\n関連章:\n\n- `s10`\n- [`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md)\n\n### `CompactSummary`\n\n役割:\n\n- context が長くなり過ぎたとき、古い会話を要約へ置き換える\n\n最小形:\n\n```python\nsummary = {\n \"task_overview\": \"...\",\n \"current_state\": \"...\",\n \"key_decisions\": [\"...\"],\n \"next_steps\": [\"...\"],\n}\n```\n\n重要なのは、compact が「ログ削除」ではないことです。 \ncompact summary は次の query 継続に必要な最小構造を残す record です。\n\n最低でも次の 4 つは落とさないようにします。\n\n- task の大枠\n- ここまで終わったこと\n- 重要な判断\n- 次にやるべきこと\n\n関連章:\n\n- `s06`\n- `s11`\n\n### `SystemPromptBlock`\n\n役割:\n\n- system prompt を section 単位で管理する\n\n最小形:\n\n```python\nblock = {\n \"text\": \"...\",\n \"cache_scope\": None,\n}\n```\n\nこの record を持つ意味:\n\n- prompt を一枚岩の巨大文字列にしない\n- どの section が何の役割か説明できる\n- 後から block 単位で差し替えや検査ができる\n\n`cache_scope` は最初は不要でも構いません。 \nただ、「この block は比較的安定」「この block は毎ターン変わる」という発想は早めに持っておくと、system prompt の理解が崩れにくくなります。\n\n関連章:\n\n- `s10`\n- [`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md)\n\n### `PromptParts`\n\n役割:\n\n- system prompt を最終連結する前に、構成 source ごとに分けて持つ\n\n最小形:\n\n```python\nparts = {\n \"core\": \"...\",\n \"tools\": \"...\",\n \"skills\": \"...\",\n \"memory\": \"...\",\n \"dynamic\": \"...\",\n}\n```\n\nこの record は、読者に次のことを教えます。\n\n- prompt は「書かれている」のではなく「組み立てられている」\n- stable policy と volatile runtime data は同じ section ではない\n- input source ごとに責務を分けた方が debug しやすい\n\n関連章:\n\n- `s10`\n\n### `QueryParams`\n\n役割:\n\n- query 開始時点で外部から受け取る入口入力\n\n最小形:\n\n```python\nparams = {\n \"messages\": [...],\n \"system_prompt\": \"...\",\n \"user_context\": {...},\n \"system_context\": {...},\n \"tool_use_context\": {...},\n \"fallback_model\": None,\n \"max_output_tokens_override\": None,\n \"max_turns\": None,\n}\n```\n\nここで大切なのは:\n\n- これは query の**入口入力**である\n- query の途中でどんどん変わる内部状態とは別である\n\nつまり `QueryParams` は「入る前に決まっているもの」、`QueryState` は「入ってから変わるもの」です。\n\n関連章:\n\n- [`s00a-query-control-plane.md`](./s00a-query-control-plane.md)\n\n### `QueryState`\n\n役割:\n\n- 1 本の query が複数ターンにわたって進む間の制御状態を持つ\n\n最小形:\n\n```python\nstate = {\n \"messages\": [...],\n \"tool_use_context\": {...},\n \"turn_count\": 1,\n \"max_output_tokens_recovery_count\": 0,\n \"has_attempted_reactive_compact\": False,\n \"max_output_tokens_override\": None,\n \"pending_tool_use_summary\": None,\n \"stop_hook_active\": False,\n \"transition\": None,\n}\n```\n\nこの record に入るものの共通点:\n\n- 対話内容そのものではない\n- 「次をどう続けるか」を決める情報である\n\n初心者がよく詰まる点:\n\n- `messages` が入っているので「全部 conversation state に見える」\n- しかし `turn_count` や `transition` は会話ではなく control state\n\nこの record を理解できると、\n\n- recovery\n- compact\n- hook continuation\n- token budget continuation\n\nがすべて「同じ query を継続する理由の差分」として読めるようになります。\n\n関連章:\n\n- [`s00a-query-control-plane.md`](./s00a-query-control-plane.md)\n- `s11`\n\n### `TransitionReason`\n\n役割:\n\n- 前ターンが終わらず、次ターンへ続いた理由を明示する\n\n最小形:\n\n```python\ntransition = {\n \"reason\": \"next_turn\",\n}\n```\n\nより実用的には次のような値が入ります。\n\n- `next_turn`\n- `tool_result_continuation`\n- `reactive_compact_retry`\n- `max_output_tokens_recovery`\n- `stop_hook_continuation`\n\nこれを別 record として持つ利点:\n\n- log が読みやすい\n- test が書きやすい\n- recovery の分岐理由を説明しやすい\n\nつまりこれは「高度な最適化」ではなく、 \n**継続理由を見える状態へ変えるための最小構造**です。\n\n関連章:\n\n- [`s00a-query-control-plane.md`](./s00a-query-control-plane.md)\n- `s11`\n\n## 2. Tool 実行・権限・hook の状態\n\nこの層の核心は:\n\n> tool は `name -> handler` だけで完結せず、その前後に permission / runtime / hook の状態が存在する\n\nです。\n\n### `ToolSpec`\n\n役割:\n\n- model に「どんな tool があり、どんな入力を受け取るか」を見せる\n\n最小形:\n\n```python\ntool = {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {...},\n}\n```\n\nこれは execution 実装そのものではありません。 \nあくまで **model に見せる contract** です。\n\n関連章:\n\n- `s02`\n- `s19`\n\n### `ToolDispatchMap`\n\n役割:\n\n- tool 名を実際の handler 関数へ引く\n\n最小形:\n\n```python\ndispatch = {\n \"read_file\": run_read_file,\n \"write_file\": run_write_file,\n}\n```\n\nこの record の仕事は単純です。\n\n- 正しい handler を見つける\n\nただし実システムではこれだけで足りません。 \n本当に難しいのは:\n\n- いつ実行するか\n- 並列にしてよいか\n- permission を通すか\n- 結果をどう loop へ戻すか\n\nです。\n\n関連章:\n\n- `s02`\n- [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md)\n\n### `ToolUseContext`\n\n役割:\n\n- tool が共有状態へ触るための窓口を持つ\n\n最小形:\n\n```python\ncontext = {\n \"workspace\": \"...\",\n \"permission_system\": perms,\n \"notifications\": queue,\n \"memory_store\": memory,\n}\n```\n\nこの record がないと、各 tool が勝手に global state を触り始め、system 全体の境界が崩れます。\n\nつまり `ToolUseContext` は、\n\n> tool が system とどこで接続するか\n\nを見える形にするための record です。\n\n関連章:\n\n- `s02`\n- `s07`\n- `s09`\n- `s13`\n\n### `ToolResultEnvelope`\n\n役割:\n\n- tool 実行結果を loop が扱える統一形式で包む\n\n最小形:\n\n```python\nresult = {\n \"tool_use_id\": \"toolu_123\",\n \"content\": \"...\",\n}\n```\n\n大切なのは、tool 結果が「ただの文字列」ではないことです。 \n最低でも:\n\n- どの tool call に対する結果か\n- loop にどう書き戻すか\n\nを持たせる必要があります。\n\n関連章:\n\n- `s02`\n\n### `PermissionRule`\n\n役割:\n\n- 特定 tool / path / content に対する allow / deny / ask 条件を表す\n\n最小形:\n\n```python\nrule = {\n \"tool\": \"bash\",\n \"behavior\": \"deny\",\n \"path\": None,\n \"content\": \"sudo *\",\n}\n```\n\nこの record があることで、permission system は次を言えるようになります。\n\n- どの tool に対する rule か\n- 何にマッチしたら発火するか\n- 発火後に何を返すか\n\n関連章:\n\n- `s07`\n\n### `PermissionDecision`\n\n役割:\n\n- 今回の tool 実行に対する permission 結果を表す\n\n最小形:\n\n```python\ndecision = {\n \"behavior\": \"allow\" | \"deny\" | \"ask\",\n \"reason\": \"...\",\n}\n```\n\nこれを独立 record にする意味:\n\n- deny 理由を model が見える\n- ask を loop に戻して次アクションを組み立てられる\n- log や UI にも同じ object を流せる\n\n関連章:\n\n- `s07`\n\n### `HookEvent`\n\n役割:\n\n- pre_tool / post_tool / on_error などの lifecycle event を統一形で渡す\n\n最小形:\n\n```python\nevent = {\n \"kind\": \"post_tool\",\n \"tool_name\": \"edit_file\",\n \"input\": {...},\n \"result\": \"...\",\n \"error\": None,\n \"duration_ms\": 42,\n}\n```\n\nhook が安定して増やせるかどうかは、この record の形が揃っているかに大きく依存します。\n\nもし毎回適当な文字列だけを hook に渡すと:\n\n- audit hook\n- metrics hook\n- policy hook\n\nのたびに payload 形式がばらけます。\n\n関連章:\n\n- `s08`\n\n### `ToolExecutionBatch`\n\n役割:\n\n- 同じ execution lane でまとめて調度してよい tool block の束を表す\n\n最小形:\n\n```python\nbatch = {\n \"is_concurrency_safe\": True,\n \"blocks\": [tool_use_1, tool_use_2],\n}\n```\n\nこの record を導入すると、読者は:\n\n- tool を常に 1 個ずつ実行する必要はない\n- ただし何でも並列にしてよいわけでもない\n\nという 2 本の境界を同時に理解しやすくなります。\n\n関連章:\n\n- [`s02b-tool-execution-runtime.md`](./s02b-tool-execution-runtime.md)\n\n### `TrackedTool`\n\n役割:\n\n- 各 tool の lifecycle を個別に追う\n\n最小形:\n\n```python\ntracked = {\n \"id\": \"toolu_01\",\n \"name\": \"read_file\",\n \"status\": \"queued\",\n \"is_concurrency_safe\": True,\n \"pending_progress\": [],\n \"results\": [],\n \"context_modifiers\": [],\n}\n```\n\nこれがあると runtime は次のことを説明できます。\n\n- 何が待機中か\n- 何が実行中か\n- 何が progress を出したか\n- 何が完了したか\n\n関連章:\n\n- [`s02b-tool-execution-runtime.md`](./s02b-tool-execution-runtime.md)\n\n### `queued_context_modifiers`\n\n役割:\n\n- 並列 tool が生んだ共有 state 変更を、先に queue し、後で安定順に merge する\n\n最小形:\n\n```python\nqueued = {\n \"toolu_01\": [modifier_a],\n \"toolu_02\": [modifier_b],\n}\n```\n\nここで守りたい境界:\n\n- 並列実行してよい\n- しかし共有 state を完了順でそのまま書き換えてよいとは限らない\n\nこの record は、parallel execution と stable merge を切り分けるための最小構造です。\n\n関連章:\n\n- [`s02b-tool-execution-runtime.md`](./s02b-tool-execution-runtime.md)\n\n## 3. Skill・memory・prompt source の状態\n\nこの層の核心は:\n\n> model input の材料は、その場でひとつの文字列に溶けているのではなく、複数の source record として存在する\n\nです。\n\n### `SkillRegistry`\n\n役割:\n\n- 利用可能な skill の索引を持つ\n\n最小形:\n\n```python\nregistry = [\n {\"name\": \"agent-browser\", \"path\": \"...\", \"description\": \"...\"},\n]\n```\n\nこれは「何があるか」を示す record であり、skill 本文そのものではありません。\n\n関連章:\n\n- `s05`\n\n### `SkillContent`\n\n役割:\n\n- 実際に読み込んだ skill の本文や補助資料を持つ\n\n最小形:\n\n```python\nskill = {\n \"name\": \"agent-browser\",\n \"body\": \"...markdown...\",\n}\n```\n\n`SkillRegistry` と `SkillContent` を分ける理由:\n\n- registry は discovery 用\n- content は injection 用\n\nつまり「見つける record」と「使う record」を分けるためです。\n\n関連章:\n\n- `s05`\n\n### `MemoryEntry`\n\n役割:\n\n- 長期に残すべき事実を 1 件ずつ持つ\n\n最小形:\n\n```python\nentry = {\n \"key\": \"package_manager_preference\",\n \"value\": \"pnpm\",\n \"scope\": \"user\",\n \"reason\": \"user explicit preference\",\n}\n```\n\nmemory の重要境界:\n\n- 会話全文を残す record ではない\n- durable fact を残す record である\n\n関連章:\n\n- `s09`\n\n### `MemoryWriteCandidate`\n\n役割:\n\n- 今回のターンから「long-term memory に昇格させる候補」を一時的に保持する\n\n最小形:\n\n```python\ncandidate = {\n \"fact\": \"Use pnpm by default\",\n \"scope\": \"user\",\n \"confidence\": \"high\",\n}\n```\n\n教学 repo では必須ではありません。 \nただし reader が「memory はいつ書くのか」で混乱しやすい場合、この record を挟むと\n\n- その場の conversation detail\n- durable fact candidate\n- 実際に保存された memory\n\nの 3 層を分けやすくなります。\n\n関連章:\n\n- `s09`\n\n## 4. Todo・task・runtime・team の状態\n\nこの層が一番混ざりやすいです。 \n理由は、全部が「仕事っぽい object」に見えるからです。\n\n### `TodoItem`\n\n役割:\n\n- 今の session 内での短期的な進行メモ\n\n最小形:\n\n```python\ntodo = {\n \"content\": \"Inspect auth tests\",\n \"status\": \"pending\",\n}\n```\n\nこれは durable work graph ではありません。 \n今ターンの認知負荷を軽くするための session-local 補助構造です。\n\n関連章:\n\n- `s03`\n\n### `PlanState`\n\n役割:\n\n- 複数の `TodoItem` と current focus をまとめる\n\n最小形:\n\n```python\nplan = {\n \"todos\": [...],\n \"current_focus\": \"Inspect auth tests\",\n}\n```\n\nこれも基本は session-local です。 \n`TaskRecord` と違って、再起動しても必ず復元したい durable board とは限りません。\n\n関連章:\n\n- `s03`\n\n### `TaskRecord`\n\n役割:\n\n- durable work goal を表す\n\n最小形:\n\n```python\ntask = {\n \"id\": \"task-auth-migrate\",\n \"title\": \"Migrate auth layer\",\n \"status\": \"pending\",\n \"dependencies\": [],\n}\n```\n\nこの record が持つべき心智:\n\n- 何を達成したいか\n- 依存関係は何か\n- 今どの状態か\n\nここで大切なのは、**task は goal node であって、今まさに走っている process ではない**ことです。\n\n関連章:\n\n- `s12`\n\n### `RuntimeTaskState`\n\n役割:\n\n- いま動いている 1 回の execution slot を表す\n\n最小形:\n\n```python\nruntime_task = {\n \"id\": \"rt_42\",\n \"task_id\": \"task-auth-migrate\",\n \"status\": \"running\",\n \"preview\": \"...\",\n \"output_file\": \".runtime-tasks/rt_42.log\",\n}\n```\n\n`TaskRecord` との違い:\n\n- `TaskRecord`: 何を達成するか\n- `RuntimeTaskState`: その goal に向かう今回の実行は今どうなっているか\n\n関連章:\n\n- `s13`\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n\n### `NotificationRecord`\n\n役割:\n\n- background 実行や外部 capability から main loop へ戻る preview を持つ\n\n最小形:\n\n```python\nnote = {\n \"source\": \"runtime_task\",\n \"task_id\": \"rt_42\",\n \"preview\": \"3 tests failing...\",\n}\n```\n\nこの record は全文ログの保存先ではありません。 \n役割は:\n\n- main loop に「戻ってきた事実」を知らせる\n- prompt space を全文ログで埋めない\n\nことです。\n\n関連章:\n\n- `s13`\n\n### `ScheduleRecord`\n\n役割:\n\n- いつ何を trigger するかを表す\n\n最小形:\n\n```python\nschedule = {\n \"name\": \"nightly-health-check\",\n \"cron\": \"0 2 * * *\",\n \"task_template\": \"repo_health_check\",\n}\n```\n\n重要な境界:\n\n- `ScheduleRecord` は時間規則\n- `TaskRecord` は work goal\n- `RuntimeTaskState` は live execution\n\nこの 3 つを一緒にしないことが `s14` の核心です。\n\n関連章:\n\n- `s14`\n\n### `TeamMember`\n\n役割:\n\n- 長期に存在する teammate の身元を表す\n\n最小形:\n\n```python\nmember = {\n \"name\": \"alice\",\n \"role\": \"test-specialist\",\n \"status\": \"working\",\n}\n```\n\n`TeamMember` は task ではありません。 \n「誰が長く system 内に存在しているか」を表す actor record です。\n\n関連章:\n\n- `s15`\n\n### `TeamConfig`\n\n役割:\n\n- team roster 全体をまとめる\n\n最小形:\n\n```python\nconfig = {\n \"team_name\": \"default\",\n \"members\": [member1, member2],\n}\n```\n\nこの record を durable に持つことで、\n\n- team に誰がいるか\n- 役割が何か\n- 次回起動時に何を復元するか\n\nが見えるようになります。\n\n関連章:\n\n- `s15`\n\n### `MessageEnvelope`\n\n役割:\n\n- teammate 間の message を、本文とメタ情報込みで包む\n\n最小形:\n\n```python\nenvelope = {\n \"type\": \"message\",\n \"from\": \"lead\",\n \"to\": \"alice\",\n \"content\": \"Review retry tests\",\n \"timestamp\": 1710000000.0,\n}\n```\n\n`envelope` を使う理由:\n\n- 誰から誰へ送ったか分かる\n- 普通の会話と protocol request を区別しやすい\n- mailbox を durable channel として扱える\n\n関連章:\n\n- `s15`\n- `s16`\n\n### `RequestRecord`\n\n役割:\n\n- approval や shutdown のような構造化 protocol state を持つ\n\n最小形:\n\n```python\nrequest = {\n \"request_id\": \"req_91\",\n \"kind\": \"plan_approval\",\n \"status\": \"pending\",\n \"payload\": {...},\n}\n```\n\nこれを別 record にすることで、\n\n- ただの chat message\n- 追跡可能な coordination request\n\nを明確に分けられます。\n\n関連章:\n\n- `s16`\n\n### `ClaimPolicy`\n\n役割:\n\n- autonomous worker が何を self-claim してよいかを表す\n\n最小形:\n\n```python\npolicy = {\n \"role\": \"test-specialist\",\n \"may_claim\": [\"retry-related\"],\n}\n```\n\nこの record がないと autonomy は「空いている worker が勝手に全部取りに行く」設計になりやすく、 \nrace condition と重複実行を呼び込みます。\n\n関連章:\n\n- `s17`\n\n### `WorktreeRecord`\n\n役割:\n\n- isolated execution lane を表す\n\n最小形:\n\n```python\nworktree = {\n \"path\": \".worktrees/wt-auth-migrate\",\n \"task_id\": \"task-auth-migrate\",\n \"status\": \"active\",\n}\n```\n\nこの record の核心:\n\n- task は goal\n- runtime slot は live execution\n- worktree は「どこで走るか」の lane\n\n関連章:\n\n- `s18`\n\n## 5. MCP・plugin・外部 capability の状態\n\nこの層の核心は:\n\n> 外部 capability も「ただの tool list」ではなく、接続状態と routing を持つ platform object である\n\nです。\n\n### `MCPServerConfig`\n\n役割:\n\n- 外部 server の設定を表す\n\n最小形:\n\n```python\nconfig = {\n \"name\": \"figma\",\n \"transport\": \"stdio\",\n \"command\": \"...\",\n}\n```\n\nこれは capability そのものではなく、接続の入口設定です。\n\n関連章:\n\n- `s19`\n\n### `ConnectionState`\n\n役割:\n\n- remote capability の現在状態を表す\n\n最小形:\n\n```python\nstate = {\n \"status\": \"connected\",\n \"needs_auth\": False,\n \"last_error\": None,\n}\n```\n\nこの record が必要な理由:\n\n- 外部 capability は常に使えるとは限らない\n- 問題が tool schema なのか connection なのか区別する必要がある\n\n関連章:\n\n- `s19`\n- [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md)\n\n### `CapabilityRoute`\n\n役割:\n\n- native tool / plugin / MCP server のどこへ解決されたかを表す\n\n最小形:\n\n```python\nroute = {\n \"source\": \"mcp\",\n \"target\": \"figma.inspect\",\n}\n```\n\nこの record があると、\n\n- 発見\n- routing\n- permission\n- 実行\n- result normalization\n\nが同じ capability bus 上で説明できます。\n\n関連章:\n\n- `s19`\n\n## 最後に、特に混同しやすい組み合わせ\n\n### `TodoItem` vs `TaskRecord`\n\n- `TodoItem`: 今 session で何を見るか\n- `TaskRecord`: durable work goal と dependency をどう持つか\n\n### `TaskRecord` vs `RuntimeTaskState`\n\n- `TaskRecord`: 何を達成したいか\n- `RuntimeTaskState`: 今回の実行は今どう進んでいるか\n\n### `RuntimeTaskState` vs `ScheduleRecord`\n\n- `RuntimeTaskState`: live execution\n- `ScheduleRecord`: いつ trigger するか\n\n### `SubagentContext` vs `TeamMember`\n\n- `SubagentContext`: 一回きりの delegation branch\n- `TeamMember`: 長期に残る actor identity\n\n### `TeamMember` vs `RequestRecord`\n\n- `TeamMember`: 誰が存在するか\n- `RequestRecord`: どんな coordination request が進行中か\n\n### `TaskRecord` vs `WorktreeRecord`\n\n- `TaskRecord`: 何をやるか\n- `WorktreeRecord`: どこでやるか\n\n### `ToolSpec` vs `CapabilityRoute`\n\n- `ToolSpec`: model に見せる contract\n- `CapabilityRoute`: 実際にどこへ routing するか\n\n## 読み終えたら言えるべきこと\n\n少なくとも次の 3 文を、自分の言葉で説明できる状態を目指してください。\n\n1. `messages` は内容状態であり、`transition` は制御状態である。\n2. `TaskRecord` は goal node であり、`RuntimeTaskState` は live execution slot である。\n3. `TeamMember`、`RequestRecord`、`WorktreeRecord` は全部「仕事っぽい」が、それぞれ actor、protocol、lane という別層の object である。\n\n## 一文で覚える\n\n**どの record が内容を持ち、どの record が流れを持ち、どれが durable でどれが runtime かを分けられれば、agent system の複雑さは急に読める形になります。**\n"
+ },
+ {
+ "version": null,
+ "slug": "entity-map",
+ "locale": "ja",
+ "title": "エンティティ地図",
+ "kind": "bridge",
+ "filename": "entity-map.md",
+ "content": "# エンティティ地図\n\n> この文書は「単語が似て見えるが、同じものではない」という混乱をほどくための地図です。\n\n## 何を分けるための文書か\n\n- [`glossary.md`](./glossary.md) は「この言葉は何か」を説明します\n- [`data-structures.md`](./data-structures.md) は「コードではどんな形か」を説明します\n- この文書は「どの層に属するか」を分けます\n\n## まず層を見る\n\n```text\nconversation layer\n - message\n - prompt block\n - reminder\n\naction layer\n - tool call\n - tool result\n - hook event\n\nwork layer\n - work-graph task\n - runtime task\n - protocol request\n\nexecution layer\n - subagent\n - teammate\n - worktree lane\n\nplatform layer\n - MCP server\n - memory record\n - capability router\n```\n\n## 混同しやすい組\n\n### `Message` vs `PromptBlock`\n\n| エンティティ | 何か | 何ではないか |\n|---|---|---|\n| `Message` | 会話履歴の内容 | 安定した system rule ではない |\n| `PromptBlock` | system instruction の断片 | 直近の会話イベントではない |\n\n### `Todo / Plan` vs `Task`\n\n| エンティティ | 何か | 何ではないか |\n|---|---|---|\n| `todo / plan` | セッション内の進行ガイド | durable work graph ではない |\n| `task` | durable な work node | その場の思いつきではない |\n\n### `Work-Graph Task` vs `RuntimeTaskState`\n\n| エンティティ | 何か | 何ではないか |\n|---|---|---|\n| work-graph task | 仕事目標と依存関係の node | 今動いている executor ではない |\n| runtime task | live execution slot | durable dependency node ではない |\n\n### `Subagent` vs `Teammate`\n\n| エンティティ | 何か | 何ではないか |\n|---|---|---|\n| subagent | 一回きりの委譲 worker | 長期に存在する team member ではない |\n| teammate | identity を持つ persistent collaborator | 使い捨て summary worker ではない |\n\n### `ProtocolRequest` vs normal message\n\n| エンティティ | 何か | 何ではないか |\n|---|---|---|\n| normal message | 自由文のやり取り | 追跡可能な approval workflow ではない |\n| protocol request | `request_id` を持つ構造化要求 | 雑談テキストではない |\n\n### `Task` vs `Worktree`\n\n| エンティティ | 何か | 何ではないか |\n|---|---|---|\n| task | 何をするか | ディレクトリではない |\n| worktree | どこで分離実行するか | 仕事目標そのものではない |\n\n### `Memory` vs `CLAUDE.md`\n\n| エンティティ | 何か | 何ではないか |\n|---|---|---|\n| memory | 後の session でも価値がある事実 | project rule file ではない |\n| `CLAUDE.md` | 安定した local rule / instruction surface | user 固有の long-term fact store ではない |\n\n### `MCPServer` vs `MCPTool`\n\n| エンティティ | 何か | 何ではないか |\n|---|---|---|\n| MCP server | 外部 capability provider | 1 個の tool 定義ではない |\n| MCP tool | server が公開する 1 つの capability | 接続面全体ではない |\n\n## 速見表\n\n| エンティティ | 主な役割 | 典型的な置き場 |\n|---|---|---|\n| `Message` | 会話履歴 | `messages[]` |\n| `PromptParts` | 入力 assembly の断片 | prompt builder |\n| `PermissionRule` | 実行可否の判断 | settings / session state |\n| `HookEvent` | lifecycle extension point | hook layer |\n| `MemoryEntry` | durable fact | memory store |\n| `TaskRecord` | durable work goal | task board |\n| `RuntimeTaskState` | live execution slot | runtime manager |\n| `TeamMember` | persistent actor | team config |\n| `MessageEnvelope` | teammate 間の構造化 message | inbox |\n| `RequestRecord` | protocol workflow state | request tracker |\n| `WorktreeRecord` | isolated execution lane | worktree index |\n| `MCPServerConfig` | 外部 capability provider 設定 | plugin / settings |\n\n## 一文で覚える\n\n**システムが複雑になるほど、単語を増やすことよりも、境界を混ぜないことの方が重要です。**\n"
+ },
+ {
+ "version": null,
+ "slug": "glossary",
+ "locale": "ja",
+ "title": "用語集",
+ "kind": "bridge",
+ "filename": "glossary.md",
+ "content": "# 用語集\n\n> この用語集は、教材主線で特に重要で、初学者が混ぜやすい言葉だけを集めたものです。 \n> 何となく見覚えはあるのに、「結局これは何を指すのか」が言えなくなったら、まずここへ戻ってください。\n\n## いっしょに見ると整理しやすい文書\n\n- [`entity-map.md`](./entity-map.md): それぞれの言葉がどの層に属するかを見る\n- [`data-structures.md`](./data-structures.md): 実際にどんな record 形へ落ちるかを見る\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md): `task` という語が 2 種類に分かれ始めたときに戻る\n- [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md): MCP が tool list だけに見えなくなったときに戻る\n\n## Agent\n\nこの教材での `agent` は、\n\n> 入力を読み、判断し、必要なら tool を呼び出して仕事を進める model\n\nを指します。\n\n簡単に言えば、\n\n- model が考える\n- harness が作業環境を与える\n\nという分担の、考える側です。\n\n## Harness\n\n`harness` は agent の周囲に置く作業環境です。\n\nたとえば次を含みます。\n\n- tools\n- filesystem\n- permission system\n- prompt assembly\n- memory\n- task runtime\n\nmodel そのものは harness ではありません。 \nharness そのものも model ではありません。\n\n## Agent Loop\n\n`agent loop` は agent system の主循環です。\n\n最小形は次の 5 手順です。\n\n1. 現在の context を model に渡す\n2. response が普通の返答か tool_use かを見る\n3. tool を実行する\n4. result を context に戻す\n5. 次の turn へ続くか止まるかを決める\n\nこの loop がなければ、system は単発の chat で終わります。\n\n## Message / `messages[]`\n\n`message` は 1 件の message、`messages[]` はその一覧です。\n\n多くの章では次を含みます。\n\n- user message\n- assistant message\n- tool_result\n\nこれは agent の main working memory にあたります。 \nただし permanent memory ではありません。\n\n## Tool\n\n`tool` は model が要求できる動作です。\n\nたとえば、\n\n- file を読む\n- file を書く\n- shell command を走らせる\n- text を検索する\n\nなどです。\n\n重要なのは、\n\n> model が直接 OS command を叩くのではなく、tool 名と引数を宣言し、実際の実行は harness 側の code が行う\n\nという点です。\n\n## Tool Schema\n\n`tool schema` は tool の使い方を model に説明する構造です。\n\n普通は次を含みます。\n\n- tool 名\n- 何をするか\n- 必要な parameter\n- parameter の型\n\n初心者向けに言えば、tool の説明書です。\n\n## Dispatch Map\n\n`dispatch map` は、\n\n> tool 名から実際の handler 関数へつなぐ対応表\n\nです。\n\nたとえば次のような形です。\n\n```python\n{\n \"read_file\": read_file_handler,\n \"write_file\": write_file_handler,\n \"bash\": bash_handler,\n}\n```\n\n## Stop Reason\n\n`stop_reason` は、model のこの turn がなぜ止まったかを示す理由です。\n\n代表例:\n\n- `end_turn`: 返答を終えた\n- `tool_use`: tool を要求した\n- `max_tokens`: 出力が token 上限で切れた\n\nmain loop はこの値を見て次の動きを決めます。\n\n## Context\n\n`context` は model が今見えている情報全体です。\n\nふつうは次を含みます。\n\n- `messages`\n- system prompt\n- dynamic reminder\n- tool_result\n\ncontext は permanent storage ではなく、\n\n> 今この turn の机の上に出ている情報\n\nと考えると分かりやすいです。\n\n## Compact / Compaction\n\n`compact` は active context を縮めることです。\n\n狙いは、\n\n- 本当に必要な流れを残す\n- 重複や雑音を削る\n- 後続 turn のための space を作る\n\nことです。\n\n大事なのは「削ること」そのものではなく、\n\n**次の turn に必要な構造を保ったまま薄くすること**\n\nです。\n\n## Subagent\n\n`subagent` は親 agent から切り出された、一回限りの delegated worker です。\n\n価値は次です。\n\n- 親 context を汚さずに subtask を処理できる\n- 結果だけを summary として返せる\n\n`teammate` とは違い、長く system に残る actor ではありません。\n\n## Fork\n\nこの教材での `fork` は、\n\n> 子 agent を空白から始めるのではなく、親の context を引き継いで始める方式\n\nを指します。\n\nsubtask が親の議論背景を理解している必要があるときに使います。\n\n## Permission\n\n`permission` は、\n\n> model が要求した操作を実行してよいか判定する層\n\nです。\n\n良い permission system は少なくとも次を分けます。\n\n- すぐ拒否すべきもの\n- 自動許可してよいもの\n- user に確認すべきもの\n\n## Permission Mode\n\n`permission mode` は permission system の動作方針です。\n\n例:\n\n- `default`\n- `plan`\n- `auto`\n\nつまり個々の request の判定規則ではなく、\n\n> 判定の全体方針\n\nです。\n\n## Hook\n\n`hook` は主 loop を書き換えずに、特定の timing で追加動作を差し込む拡張点です。\n\nたとえば、\n\n- tool 実行前に検査する\n- tool 実行後に監査 log を書く\n\nのようなことを行えます。\n\n## Memory\n\n`memory` は session をまたいで残す価値のある情報です。\n\n向いているもの:\n\n- user の長期的 preference\n- 何度も再登場する重要事実\n- 将来の session でも役に立つ feedback\n\n向いていないもの:\n\n- その場限りの冗長な chat 履歴\n- すぐ再導出できる一時情報\n\n## System Prompt\n\n`system prompt` は system-level の instruction surface です。\n\nここでは model に対して、\n\n- あなたは何者か\n- 何を守るべきか\n- どのように協力すべきか\n\nを与えます。\n\n普通の user message より安定して効く層です。\n\n## System Reminder\n\n`system reminder` は毎 turn 動的に差し込まれる短い補助情報です。\n\nたとえば、\n\n- current working directory\n- 現在日付\n- この turn だけ必要な補足\n\nなどです。\n\nstable な system prompt とは役割が違います。\n\n## Query\n\nこの教材での `query` は、\n\n> 1 つの user request を完了させるまで続く多 turn の処理全体\n\nを指します。\n\n単発の 1 回応答ではなく、\n\n- model 呼び出し\n- tool 実行\n- continuation\n- recovery\n\nを含んだまとまりです。\n\n## Transition Reason\n\n`transition reason` は、\n\n> なぜこの system が次の turn へ続いたのか\n\nを説明する理由です。\n\nこれが見えるようになると、\n\n- 普通の tool continuation\n- retry\n- compact 後の再開\n- recovery path\n\nを混ぜずに見られるようになります。\n\n## Task\n\n`task` は durable work graph の中にある仕事目標です。\n\nふつう次を持ちます。\n\n- subject\n- status\n- owner\n- dependency\n\nここでの task は「いま実行中の command」ではなく、\n\n> system が長く持ち続ける work goal\n\nです。\n\n## Dependency Graph\n\n`dependency graph` は task 間の依存関係です。\n\nたとえば、\n\n- A が終わってから B\n- C と D は並行可\n- E は C と D の両方待ち\n\nのような関係を表します。\n\nこれにより system は、\n\n- 今できる task\n- まだ blocked な task\n- 並行可能な task\n\nを判断できます。\n\n## Runtime Task / Runtime Slot\n\n`runtime task` または `runtime slot` は、\n\n> いま実行中、待機中、または直前まで動いていた live execution unit\n\nを指します。\n\n例:\n\n- background の `pytest`\n- 走っている teammate\n- monitor process\n\n`task` との違いはここです。\n\n- `task`: goal\n- `runtime slot`: live execution\n\n## Teammate\n\n`teammate` は multi-agent system 内で長く存在する collaborator です。\n\n`subagent` との違い:\n\n- `subagent`: 一回限りの委譲 worker\n- `teammate`: 長く残り、繰り返し仕事を受ける actor\n\n## Protocol\n\n`protocol` は、事前に決めた協調ルールです。\n\n答える内容は次です。\n\n- message はどんな shape か\n- response はどう返すか\n- approve / reject / expire をどう記録するか\n\nteam 章では多くの場合、\n\n```text\nrequest -> response -> status update\n```\n\nという骨格で現れます。\n\n## Envelope\n\n`envelope` は、\n\n> 本文に加えてメタデータも一緒に包んだ構造化 record\n\nです。\n\nたとえば message 本文に加えて、\n\n- `from`\n- `to`\n- `request_id`\n- `timestamp`\n\nを一緒に持つものです。\n\n## State Machine\n\n`state machine` は難しい理論名に見えますが、ここでは\n\n> 状態がどう変化してよいかを書いた規則表\n\nです。\n\nたとえば、\n\n```text\npending -> approved\npending -> rejected\npending -> expired\n```\n\nだけでも最小の state machine です。\n\n## Router\n\n`router` は分配器です。\n\n役割は、\n\n- request がどの種類かを見る\n- 正しい処理経路へ送る\n\nことです。\n\ntool system では、\n\n- local handler\n- MCP client\n- plugin bridge\n\nのどこへ送るかを決める層として現れます。\n\n## Control Plane\n\n`control plane` は、\n\n> 自分で本仕事をするというより、誰がどう実行するかを調整する層\n\nです。\n\nたとえば、\n\n- permission 判定\n- prompt assembly\n- continuation 理由\n- lane 選択\n\nなどがここに寄ります。\n\n初見では怖く見えるかもしれませんが、この教材ではまず\n\n> 実作業そのものではなく、作業の進め方を調整する層\n\nと覚えれば十分です。\n\n## Capability\n\n`capability` は能力項目です。\n\nMCP の文脈では、capability は tool だけではありません。\n\nたとえば、\n\n- tools\n- resources\n- prompts\n- elicitation\n\nのように複数層があります。\n\n## Worktree\n\n`worktree` は同じ repository の別 working copy です。\n\nこの教材では、\n\n> task ごとに割り当てる isolated execution directory\n\nとして使います。\n\n価値は次です。\n\n- 並行作業が互いの未コミット変更を汚染しない\n- task と execution lane の対応が見える\n- review や closeout がしやすい\n\n## MCP\n\n`MCP` は Model Context Protocol です。\n\nこの教材では単なる remote tool list より広く、\n\n> 外部 capability を統一的に接続する surface\n\nとして扱います。\n\nつまり「外部 tool を呼べる」だけではなく、\n\n- connection\n- auth\n- resources\n- prompts\n- capability routing\n\nまで含む層です。\n"
+ },
+ {
+ "version": null,
+ "slug": "s00-architecture-overview",
+ "locale": "ja",
+ "title": "s00: アーキテクチャ全体図",
+ "kind": "bridge",
+ "filename": "s00-architecture-overview.md",
+ "content": "# s00: アーキテクチャ全体図\n\n> この章は教材全体の地図です。 \n> 「結局この repository は何を教えようとしていて、なぜこの順番で章が並んでいるのか」を先に掴みたいなら、まずここから読むのがいちばん安全です。\n\n## 先に結論\n\nこの教材の章順は妥当です。\n\n大事なのは章数の多さではありません。 \n大事なのは、初学者が無理なく積み上がる順番で system を育てていることです。\n\n全体は次の 4 段階に分かれています。\n\n1. まず本当に動く単一 agent を作る\n2. その上に安全性、拡張点、memory、prompt、recovery を足す\n3. 会話中の一時的 progress を durable work system へ押し上げる\n4. 最後に teams、protocols、autonomy、worktree、MCP / plugin へ広げる\n\nこの順番が自然なのは、学習者が最初に固めるべき主線がたった 1 本だからです。\n\n```text\nuser input\n ->\nmodel reasoning\n ->\ntool execution\n ->\nresult write-back\n ->\nnext turn or finish\n```\n\nこの主線がまだ曖昧なまま後段の mechanism を積むと、\n\n- permission\n- hook\n- memory\n- MCP\n- worktree\n\nのような言葉が全部ばらばらの trivia に見えてしまいます。\n\n## この教材が再構成したいもの\n\nこの教材の目標は、どこかの production code を逐行でなぞることではありません。\n\n本当に再構成したいのは次の部分です。\n\n- 主要 module は何か\n- module 同士がどう協調するか\n- 各 module の責務は何か\n- 重要 state がどこに住むか\n- 1 つの request が system の中をどう流れるか\n\nつまり狙っているのは、\n\n**設計主脈への高い忠実度であって、周辺実装の 1:1 再現ではありません。**\n\nこれはとても重要です。\n\nもしあなたが本当に知りたいのが、\n\n> 0 から自分で高完成度の coding agent harness を作れるようになること\n\nなら、優先して掴むべきなのは次です。\n\n- agent loop\n- tools\n- planning\n- context management\n- permissions\n- hooks\n- memory\n- prompt assembly\n- tasks\n- teams\n- isolated execution lanes\n- external capability routing\n\n逆に、最初の主線に持ち込まなくてよいものもあります。\n\n- packaging / release\n- cross-platform compatibility の細かな枝\n- enterprise wiring\n- telemetry\n- 歴史的 compatibility layer\n- product 固有の naming accident\n\nこれらが存在しうること自体は否定しません。 \nただし 0-to-1 教学の中心に置くべきではありません。\n\n## 読むときの 3 つの原則\n\n### 1. まず最小で正しい版を学ぶ\n\nたとえば subagent なら、最初に必要なのはこれだけです。\n\n- 親 agent が subtask を切る\n- 子 agent が自分の `messages` を持つ\n- 子 agent が summary を返す\n\nこれだけで、\n\n**親 context を汚さずに探索作業を切り出せる**\n\nという核心は学べます。\n\nそのあとでようやく、\n\n- 親 context を引き継ぐ fork\n- 独立 permission\n- background 実行\n- worktree 隔離\n\nを足せばよいです。\n\n### 2. 新しい語は使う前に意味を固める\n\nこの教材では次のような語が頻繁に出ます。\n\n- state machine\n- dispatch map\n- dependency graph\n- worktree\n- protocol envelope\n- capability\n- control plane\n\n意味が曖昧なまま先へ進むと、後ろの章で一気に詰まります。\n\nそのときは無理に本文を読み切ろうとせず、次の文書へ戻ってください。\n\n- [`glossary.md`](./glossary.md)\n- [`entity-map.md`](./entity-map.md)\n- [`data-structures.md`](./data-structures.md)\n\n### 3. 周辺の複雑さを主線へ持ち込みすぎない\n\n良い教材は「全部話す教材」ではありません。\n\n良い教材は、\n\n- 核心は完全に話す\n- 周辺で重く複雑なものは後ろへ回す\n\nという構造を持っています。\n\nだからこの repository では、あえて主線の外に置いている内容があります。\n\n- packaging / release\n- enterprise policy glue\n- telemetry\n- client integration の細部\n- 逐行の逆向き比較 trivia\n\n## 先に開いておくと楽な補助文書\n\n主線 chapter と一緒に、次の文書を補助地図として持っておくと理解が安定します。\n\n| 文書 | 用途 |\n|---|---|\n| [`teaching-scope.md`](./teaching-scope.md) | 何を教え、何を意図的に省くかを見る |\n| [`data-structures.md`](./data-structures.md) | system 全体の重要 record を一か所で見る |\n| [`s00f-code-reading-order.md`](./s00f-code-reading-order.md) | chapter order と local code reading order をそろえる |\n\nさらに、後半で mechanism 間のつながりが曖昧になったら、次の bridge docs が効きます。\n\n| 文書 | 補うもの |\n|---|---|\n| [`s00d-chapter-order-rationale.md`](./s00d-chapter-order-rationale.md) | なぜ今の順番で学ぶのか |\n| [`s00e-reference-module-map.md`](./s00e-reference-module-map.md) | 参照 repository の高信号 module 群と教材章の対応 |\n| [`s00a-query-control-plane.md`](./s00a-query-control-plane.md) | 高完成度 system に loop 以外の control plane が必要になる理由 |\n| [`s00b-one-request-lifecycle.md`](./s00b-one-request-lifecycle.md) | 1 request が system 全体をどう流れるか |\n| [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md) | tool layer が単なる `tool_name -> handler` で終わらない理由 |\n| [`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md) | message / prompt / memory がどこで合流するか |\n| [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) | durable task と live runtime slot の違い |\n| [`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md) | MCP を capability bus として見るための地図 |\n| [`entity-map.md`](./entity-map.md) | entity の境界を徹底的に分ける |\n\n## 4 段階の学習パス\n\n### Stage 1: Core Single-Agent (`s01-s06`)\n\nここでの目標は、\n\n**まず本当に役に立つ単一 agent を作ること**\n\nです。\n\n| 章 | 学ぶもの | 解く問題 |\n|---|---|---|\n| `s01` | Agent Loop | loop がなければ agent にならない |\n| `s02` | Tool Use | model を「話すだけ」から「実際に動く」へ変える |\n| `s03` | Todo / Planning | multi-step work が漂わないようにする |\n| `s04` | Subagent | 探索作業で親 context を汚さない |\n| `s05` | Skills | 必要な知識だけ後から載せる |\n| `s06` | Context Compact | 会話が長くなっても主線を保つ |\n\n### Stage 2: Hardening (`s07-s11`)\n\nここでの目標は、\n\n**動くだけの agent を、安全で拡張可能な agent へ押し上げること**\n\nです。\n\n| 章 | 学ぶもの | 解く問題 |\n|---|---|---|\n| `s07` | Permission System | 危険な操作を gate の後ろへ置く |\n| `s08` | Hook System | loop 本体を書き換えず周辺拡張する |\n| `s09` | Memory System | 本当に価値ある情報だけを跨 session で残す |\n| `s10` | System Prompt | stable rule と runtime input を組み立てる |\n| `s11` | Error Recovery | 失敗後も stop 一択にしない |\n\n### Stage 3: Runtime Work (`s12-s14`)\n\nここでの目標は、\n\n**session 中の計画を durable work graph と runtime execution に分けること**\n\nです。\n\n| 章 | 学ぶもの | 解く問題 |\n|---|---|---|\n| `s12` | Task System | work goal を disk 上に持つ |\n| `s13` | Background Tasks | 遅い command が前景思考を止めないようにする |\n| `s14` | Cron Scheduler | 時間そのものを trigger にする |\n\n### Stage 4: Platform (`s15-s19`)\n\nここでの目標は、\n\n**single-agent harness を協調 platform へ広げること**\n\nです。\n\n| 章 | 学ぶもの | 解く問題 |\n|---|---|---|\n| `s15` | Agent Teams | persistent teammate を持つ |\n| `s16` | Team Protocols | 協調を自由文から structured flow へ上げる |\n| `s17` | Autonomous Agents | idle teammate が自分で次の work を取れるようにする |\n| `s18` | Worktree Isolation | 並行 task が同じ directory を踏み荒らさないようにする |\n| `s19` | MCP & Plugin | 外部 capability を統一 surface で扱う |\n\n## 各章が system に足す中核構造\n\n読者が中盤で混乱しやすいのは、\n\n- 今の章は何を増やしているのか\n- その state は system のどこに属するのか\n\nが曖昧になるからです。\n\nそこで各章を「新しく足す構造」で見直すとこうなります。\n\n| 章 | 中核構造 | 学習後に言えるべきこと |\n|---|---|---|\n| `s01` | `LoopState` | 最小の agent loop を自分で書ける |\n| `s02` | `ToolSpec` / dispatch map | model の意図を安定して実行へ落とせる |\n| `s03` | `TodoItem` / `PlanState` | 現在の progress を外部 state として持てる |\n| `s04` | `SubagentContext` | 親 context を汚さず委譲できる |\n| `s05` | `SkillRegistry` | 必要な knowledge を必要な時だけ注入できる |\n| `s06` | compaction records | 長い対話でも主線を保てる |\n| `s07` | `PermissionDecision` | 実行を gate の後ろへ置ける |\n| `s08` | hook events | loop を壊さず extension を追加できる |\n| `s09` | memory records | 跨 session で残すべき情報を選別できる |\n| `s10` | prompt parts | 入力を section 単位で組み立てられる |\n| `s11` | recovery state / transition reason | なぜ続行するのかを state として説明できる |\n| `s12` | `TaskRecord` | durable work graph を作れる |\n| `s13` | `RuntimeTaskState` | live execution と work goal を分けて見られる |\n| `s14` | `ScheduleRecord` | time-based trigger を足せる |\n| `s15` | `TeamMember` | persistent actor を持てる |\n| `s16` | `ProtocolEnvelope` / `RequestRecord` | structured coordination を作れる |\n| `s17` | `ClaimPolicy` / autonomy state | 自治的な claim / resume を説明できる |\n| `s18` | `WorktreeRecord` / `TaskBinding` | 並行 execution lane を分離できる |\n| `s19` | `MCPServerConfig` / capability route | native / plugin / MCP を同じ外側境界で見られる |\n\n## system 全体を 3 層で見る\n\n全体を最も簡単に捉えるなら、次の 3 層に分けてください。\n\n```text\n1. Main Loop\n user input を受け、model を呼び、結果に応じて続く\n\n2. Control / Context Layer\n permission、hook、memory、prompt、recovery が loop を支える\n\n3. Work / Platform Layer\n tasks、teams、runtime slots、worktrees、MCP が大きな作業面を作る\n```\n\n図で見るとこうです。\n\n```text\nUser\n |\n v\nmessages[]\n |\n v\n+-------------------------+\n| Agent Loop (s01) |\n| 1. 入力を組み立てる |\n| 2. model を呼ぶ |\n| 3. stop_reason を見る |\n| 4. tool を実行する |\n| 5. result を write-back |\n| 6. 次 turn を決める |\n+-------------------------+\n |\n +------------------------------+\n | |\n v v\nTool / Control Plane Context / State Layer\n(s02, s07, s08, s19) (s03, s06, s09, s10, s11)\n | |\n v v\nTasks / Teams / Worktree / Runtime (s12-s18)\n```\n\nここで大切なのは、system 全体を 1 本の巨大な file や 1 つの class として捉えないことです。\n\n**chapter order とは、system をどの層の順で理解すると最も心智負荷が低いかを表したもの**\n\nです。\n\n## この章を読み終えたら何が言えるべきか\n\nこの章のゴールは、個々の API を覚えることではありません。\n\n読み終えた時点で、少なくとも次の 3 文を自分の言葉で言える状態を目指してください。\n\n1. この教材は production implementation の周辺 detail ではなく、agent harness の主設計を教えている\n2. chapter order は `single agent -> hardening -> runtime work -> platform` の 4 段階で意味がある\n3. 後ろの章の mechanism は前の章の上に自然に積み上がるので、順番を大きく崩すと学習心智が乱れる\n\n## 一文で覚える\n\n**良い章順とは、機能一覧ではなく、前の層から次の層が自然に育つ学習経路です。**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00a-query-control-plane",
+ "locale": "ja",
+ "title": "s00a: Query Control Plane",
+ "kind": "bridge",
+ "filename": "s00a-query-control-plane.md",
+ "content": "# s00a: Query Control Plane\n\n> これは主線章ではなく橋渡し文書です。 \n> ここで答えたいのは次の問いです。\n>\n> **なぜ高完成度の agent は `messages[]` と `while True` だけでは足りないのか。**\n\n## なぜこの文書が必要か\n\n`s01` では最小の loop を学びます。\n\n```text\nユーザー入力\n ->\nモデル応答\n ->\ntool_use があれば実行\n ->\ntool_result を戻す\n ->\n次ターン\n```\n\nこれは正しい出発点です。\n\nただし実システムが成長すると、支えるのは loop 本体だけではなく:\n\n- 今どの turn か\n- なぜ続行したのか\n- compact を試したか\n- token recovery 中か\n- hook が終了条件に影響しているか\n\nといった **query 制御状態** です。\n\nこの層を明示しないと、動く demo は作れても、高完成度 harness へ育てにくくなります。\n\n## まず用語を分ける\n\n### Query\n\nここでの `query` は database query ではありません。\n\n意味は:\n\n> 1つのユーザー要求を完了するまで続く、多ターンの処理全体\n\nです。\n\n### Control Plane\n\n`control plane` は:\n\n> 実際の業務動作をする層ではなく、流れをどう進めるかを管理する層\n\nです。\n\nここでは:\n\n- model 応答や tool result は内容\n- 「次に続けるか」「なぜ続けるか」は control plane\n\nと考えると分かりやすいです。\n\n### Transition Reason\n\n`transition reason` は:\n\n> 前のターンが終わらず、次ターンへ進んだ理由\n\nです。\n\nたとえば:\n\n- tool が終わった\n- 出力が切れて続きを書く必要がある\n- compact 後に再実行する\n- hook が続行を要求した\n\nなどがあります。\n\n## 最小の心智モデル\n\n```text\n1. 入力層\n - messages\n - system prompt\n - runtime context\n\n2. 制御層\n - query state\n - turn count\n - transition reason\n - compact / recovery flags\n\n3. 実行層\n - model call\n - tool execution\n - write-back\n```\n\nこの層は loop を置き換えるためではありません。\n\n**小さな loop を、分岐と状態を扱える system に育てるため**にあります。\n\n## なぜ `messages[]` だけでは足りないか\n\n最小 demo では、多くのことを `messages[]` に押し込めても動きます。\n\nしかし次の情報は会話内容ではなく制御状態です。\n\n- reactive compact を既に試したか\n- 出力続行を何回したか\n- 今回の続行が tool によるものか recovery によるものか\n- 今だけ output budget を変えているか\n\nこれらを全部 `messages[]` に混ぜると、状態の境界が崩れます。\n\n## 主要なデータ構造\n\n### `QueryParams`\n\nquery に入るときの外部入力です。\n\n```python\nparams = {\n \"messages\": [...],\n \"system_prompt\": \"...\",\n \"user_context\": {...},\n \"system_context\": {...},\n \"tool_use_context\": {...},\n \"max_output_tokens_override\": None,\n \"max_turns\": None,\n}\n```\n\nこれは「入口で既に分かっているもの」です。\n\n### `QueryState`\n\nquery の途中で変わり続ける制御状態です。\n\n```python\nstate = {\n \"messages\": [...],\n \"tool_use_context\": {...},\n \"turn_count\": 1,\n \"continuation_count\": 0,\n \"has_attempted_compact\": False,\n \"max_output_tokens_override\": None,\n \"stop_hook_active\": False,\n \"transition\": None,\n}\n```\n\n重要なのは:\n\n- 内容状態と制御状態を分ける\n- どの continue site も同じ state を更新する\n\nことです。\n\n### `TransitionReason`\n\n続行理由は文字列でも enum でもよいですが、明示する方がよいです。\n\n```python\nTRANSITIONS = (\n \"tool_result_continuation\",\n \"max_tokens_recovery\",\n \"compact_retry\",\n \"stop_hook_continuation\",\n)\n```\n\nこれで:\n\n- log\n- test\n- debug\n- 教材説明\n\nがずっと分かりやすくなります。\n\n## 最小実装の流れ\n\n### 1. 外部入力と内部状態を分ける\n\n```python\ndef query(params):\n state = {\n \"messages\": params[\"messages\"],\n \"tool_use_context\": params[\"tool_use_context\"],\n \"turn_count\": 1,\n \"continuation_count\": 0,\n \"has_attempted_compact\": False,\n \"transition\": None,\n }\n```\n\n### 2. 各ターンで state を読んで実行する\n\n```python\nwhile True:\n response = call_model(...)\n```\n\n### 3. 続行時は必ず state に理由を書き戻す\n\n```python\nif response.stop_reason == \"tool_use\":\n state[\"messages\"] = append_tool_results(...)\n state[\"transition\"] = \"tool_result_continuation\"\n state[\"turn_count\"] += 1\n continue\n```\n\n大事なのは:\n\n**ただ `continue` するのではなく、なぜ `continue` したかを状態に残すこと**\n\nです。\n\n## 初学者が混ぜやすいもの\n\n### 1. 会話内容と制御状態\n\n- `messages` は内容\n- `turn_count` や `transition` は制御\n\n### 2. Loop と Control Plane\n\n- loop は反復の骨格\n- control plane はその反復を管理する層\n\n### 3. Prompt assembly と query state\n\n- prompt assembly は「このターンに model へ何を渡すか」\n- query state は「この query が今どういう状態か」\n\n## 一文で覚える\n\n**高完成度の agent では、会話内容を持つ層と、続行理由を持つ層を分けた瞬間に system の見通しが良くなります。**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00b-one-request-lifecycle",
+ "locale": "ja",
+ "title": "s00b: 1 リクエストのライフサイクル",
+ "kind": "bridge",
+ "filename": "s00b-one-request-lifecycle.md",
+ "content": "# s00b: 1 リクエストのライフサイクル\n\n> これは橋渡し文書です。 \n> 章ごとの説明を、1本の実行の流れとしてつなぎ直します。\n>\n> 問いたいのは次です。\n>\n> **ユーザーの一言が system に入ってから、どう流れ、どこで状態が変わり、どう loop に戻るのか。**\n\n## なぜ必要か\n\n章を順に読むと、個別の仕組みは理解できます。\n\n- `s01`: loop\n- `s02`: tools\n- `s07`: permissions\n- `s09`: memory\n- `s12-s19`: tasks / teams / worktree / MCP\n\nしかし実装段階では、次の疑問で詰まりやすいです。\n\n- 先に走るのは prompt か memory か\n- tool 実行前に permissions と hooks はどこへ入るのか\n- task、runtime task、teammate、worktree はどの段で関わるのか\n\nこの文書はその縦の流れをまとめます。\n\n## まず全体図\n\n```text\nユーザー要求\n |\n v\nQuery State 初期化\n |\n v\nsystem prompt / messages / reminders を組み立てる\n |\n v\nモデル呼び出し\n |\n +-- 普通の応答 --------------------------> 今回の request は終了\n |\n +-- tool_use\n |\n v\n Tool Router\n |\n +-- permission gate\n +-- hook interception\n +-- native tool / task / teammate / MCP\n |\n v\n 実行結果\n |\n +-- task / runtime / memory / worktree 状態を書き換える場合がある\n |\n v\n tool_result を messages へ write-back\n |\n v\n Query State 更新\n |\n v\n 次ターン\n```\n\n## 第 1 段: Query State を作る\n\nユーザーが:\n\n```text\ntests/test_auth.py の失敗を直して、原因も説明して\n```\n\nと言ったとき、最初に起きるのは shell 実行ではありません。\n\nまず「今回の request の状態」が作られます。\n\n```python\nquery_state = {\n \"messages\": [{\"role\": \"user\", \"content\": user_text}],\n \"turn_count\": 1,\n \"transition\": None,\n \"tool_use_context\": {...},\n}\n```\n\nポイントは:\n\n**1 リクエスト = 1 API call ではなく、複数ターンにまたがる処理**\n\nということです。\n\n## 第 2 段: モデル入力を組み立てる\n\n実システムは、生の `messages` だけをそのまま送らないことが多いです。\n\n組み立てる対象はたとえば:\n\n- system prompt blocks\n- normalized messages\n- memory section\n- reminders\n- tool list\n\nつまりモデルが実際に見るのは:\n\n```text\nsystem prompt\n+ normalized messages\n+ optional memory / reminders / attachments\n+ tools\n```\n\nここで大事なのは:\n\n**system prompt は入力全体ではなく、その一部**\n\nだということです。\n\n## 第 3 段: モデルは 2 種類の出力を返す\n\n### 1. 普通の回答\n\n結論や説明だけを返し、今回の request が終わる場合です。\n\n### 2. 動作意図\n\ntool call です。\n\n例:\n\n```text\nread_file(...)\nbash(...)\ntodo_write(...)\nagent(...)\nmcp__server__tool(...)\n```\n\nここで system が受け取るのは単なる文章ではなく:\n\n> モデルが「現実の動作を起こしたい」という意図\n\nです。\n\n## 第 4 段: Tool Router が受け取る\n\n`tool_use` が出たら、次は tool control plane の責任です。\n\n最低でも次を決めます。\n\n1. これはどの tool か\n2. どの handler / capability へ送るか\n3. 実行前に permission が必要か\n4. hook が割り込むか\n5. どの共有状態へアクセスするか\n\n## 第 5 段: Permission が gate をかける\n\n危険な動作は、そのまま実行されるべきではありません。\n\nたとえば:\n\n- file write\n- bash\n- 外部 service 呼び出し\n- worktree の削除\n\nここで system は:\n\n```text\ndeny\n -> mode\n -> allow\n -> ask\n```\n\nのような判断経路を持ちます。\n\npermission が扱うのは:\n\n> この動作を起こしてよいか\n\nです。\n\n## 第 6 段: Hook が周辺ロジックを足す\n\nhook は permission とは別です。\n\nhook は:\n\n- 実行前の補助チェック\n- 実行後の記録\n- 補助メッセージの注入\n\nなど、loop の周辺で side effect を足します。\n\nつまり:\n\n- permission は gate\n- hook は extension\n\nです。\n\n## 第 7 段: 実行結果が状態を変える\n\ntool は text だけを返すとは限りません。\n\n実行によって:\n\n- task board が更新される\n- runtime task が生成される\n- memory 候補が増える\n- worktree lane が作られる\n- teammate へ request が飛ぶ\n- MCP resource / tool result が返る\n\nといった状態変化が起きます。\n\nここでの大原則は:\n\n**tool result は内容を返すだけでなく、system state を進める**\n\nということです。\n\n## 第 8 段: tool_result を loop へ戻す\n\n最後に system は結果を `messages` へ戻します。\n\n```python\nmessages.append({\n \"role\": \"user\",\n \"content\": [\n {\"type\": \"tool_result\", ...}\n ],\n})\n```\n\nそして query state を更新し:\n\n- `turn_count`\n- `transition`\n- compact / recovery flags\n\nなどを整えて、次ターンへ進みます。\n\n## 後半章はどこで関わるか\n\n| 仕組み | 1 request の中での役割 |\n|---|---|\n| `s09` memory | 入力 assembly の一部になる |\n| `s10` prompt pipeline | 各 source を 1 つの model input へ組む |\n| `s12` task | durable work goal を持つ |\n| `s13` runtime task | 今動いている execution slot を持つ |\n| `s15-s17` teammate / protocol / autonomy | request を actor 間で回す |\n| `s18` worktree | 実行ディレクトリを分離する |\n| `s19` MCP | 外部 capability provider と接続する |\n\n## 一文で覚える\n\n**1 request の本体は「モデルを 1 回呼ぶこと」ではなく、「入力を組み、動作を実行し、結果を state に戻し、必要なら次ターンへ続けること」です。**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00c-query-transition-model",
+ "locale": "ja",
+ "title": "s00c: Query Transition Model",
+ "kind": "bridge",
+ "filename": "s00c-query-transition-model.md",
+ "content": "# s00c: Query Transition Model\n\n> この bridge doc は次の一点を解くためのものです。\n>\n> **高完成度の agent では、なぜ query が次の turn へ続くのかを明示しなければならないのか。**\n\n## なぜこの資料が必要か\n\n主線では次を順に学びます。\n\n- `s01`: 最小 loop\n- `s06`: context compact\n- `s11`: error recovery\n\n流れ自体は正しいです。\n\nただし、章ごとに別々に読むと多くの読者は次のように理解しがちです。\n\n> 「とにかく `continue` したから次へ進む」\n\nこれは toy demo なら動きます。\n\nしかし高完成度システムではすぐに破綻します。\n\nなぜなら query が継続する理由は複数あり、それぞれ本質が違うからです。\n\n- tool が終わり、その結果を model に戻す\n- 出力が token 上限で切れて続きが必要\n- compact 後に再試行する\n- transport error の後で backoff して再試行する\n- stop hook がまだ終わるなと指示する\n- budget policy がまだ継続を許している\n\nこれら全部を曖昧な `continue` に潰すと、すぐに次が悪化します。\n\n- log が読みにくくなる\n- test が書きにくくなる\n- 学習者の心智モデルが濁る\n\n## まず用語\n\n### transition とは\n\nここでの `transition` は:\n\n> 前の turn が次の turn へ移った理由\n\nを指します。\n\nmessage 内容そのものではなく、制御上の原因です。\n\n### continuation とは\n\ncontinuation は:\n\n> この query がまだ終わっておらず、先へ進むべき状態\n\nのことです。\n\nただし continuation は一種類ではありません。\n\n### query boundary とは\n\nquery boundary は turn と次の turn の境目です。\n\nこの境界を越えるたびに、システムは次を知っているべきです。\n\n- なぜ続くのか\n- 続く前にどの state を変えたのか\n- 次の turn がその変更をどう解釈するのか\n\n## 最小の心智モデル\n\nquery を一本の直線だと思わないでください。\n\nより実像に近い理解は次です。\n\n```text\n1 本の query\n = 明示された continuation reason を持つ\n state transition の連鎖\n```\n\n例えば:\n\n```text\nuser input\n ->\nmodel emits tool_use\n ->\ntool finishes\n ->\ntool_result_continuation\n ->\nmodel output is truncated\n ->\nmax_tokens_recovery\n ->\ncompact_retry\n ->\nfinal completion\n```\n\n重要なのは:\n\n> システムは while loop を漫然と回しているのではなく、\n> 明示された transition reason の列で進んでいる\n\nということです。\n\n## 主要 record\n\n### 1. query state の `transition`\n\n教材版でも次のような field は明示しておくべきです。\n\n```python\nstate = {\n \"messages\": [...],\n \"turn_count\": 3,\n \"continuation_count\": 1,\n \"has_attempted_compact\": False,\n \"transition\": None,\n}\n```\n\nこの field は飾りではありません。\n\nこれによって:\n\n- この turn がなぜ存在するか\n- log がどう説明すべきか\n- test がどの path を assert すべきか\n\nが明確になります。\n\n### 2. `TransitionReason`\n\n教材版の最小集合は次の程度で十分です。\n\n```python\nTRANSITIONS = (\n \"tool_result_continuation\",\n \"max_tokens_recovery\",\n \"compact_retry\",\n \"transport_retry\",\n \"stop_hook_continuation\",\n \"budget_continuation\",\n)\n```\n\nこれらは同じではありません。\n\n- `tool_result_continuation`\n は通常の主線継続\n- `max_tokens_recovery`\n は切れた出力の回復継続\n- `compact_retry`\n は context 再構成後の継続\n- `transport_retry`\n は基盤失敗後の再試行継続\n- `stop_hook_continuation`\n は外部制御による継続\n- `budget_continuation`\n は budget policy による継続\n\n### 3. continuation budget\n\n高完成度システムは単に続行するだけではなく、続行回数を制御します。\n\n```python\nstate = {\n \"max_output_tokens_recovery_count\": 2,\n \"has_attempted_reactive_compact\": True,\n}\n```\n\n本質は:\n\n> continuation は無限の抜け道ではなく、制御された資源\n\nという点です。\n\n## 最小実装の進め方\n\n### Step 1: continue site を明示する\n\n初心者の loop はよくこうなります。\n\n```python\ncontinue\n```\n\n教材版は一歩進めます。\n\n```python\nstate[\"transition\"] = \"tool_result_continuation\"\ncontinue\n```\n\n### Step 2: continuation と state patch を対にする\n\n```python\nif response.stop_reason == \"tool_use\":\n state[\"messages\"] = append_tool_results(...)\n state[\"turn_count\"] += 1\n state[\"transition\"] = \"tool_result_continuation\"\n continue\n\nif response.stop_reason == \"max_tokens\":\n state[\"messages\"].append({\n \"role\": \"user\",\n \"content\": CONTINUE_MESSAGE,\n })\n state[\"max_output_tokens_recovery_count\"] += 1\n state[\"transition\"] = \"max_tokens_recovery\"\n continue\n```\n\n大事なのは「1 行増えた」ことではありません。\n\n大事なのは:\n\n> 続行する前に、理由と state mutation を必ず知っている\n\nことです。\n\n### Step 3: 通常継続と recovery 継続を分ける\n\n```python\nif should_retry_transport(error):\n time.sleep(backoff(...))\n state[\"transition\"] = \"transport_retry\"\n continue\n\nif should_recompact(error):\n state[\"messages\"] = compact_messages(state[\"messages\"])\n state[\"transition\"] = \"compact_retry\"\n continue\n```\n\nここまで来ると `continue` は曖昧な動作ではなく、型付きの control transition になります。\n\n## 何を test すべきか\n\n教材 repo では少なくとも次を test しやすくしておくべきです。\n\n- tool result が `tool_result_continuation` を書く\n- truncated output が `max_tokens_recovery` を書く\n- compact retry が古い reason を黙って使い回さない\n- transport retry が通常 turn に見えない\n\nこれが test しづらいなら、まだ model が暗黙的すぎます。\n\n## 何を教えすぎないか\n\nvendor 固有の transport detail や細かすぎる enum を全部教える必要はありません。\n\n教材 repo で本当に必要なのは次です。\n\n> 1 本の query は明示された transition の連鎖であり、\n> 各 transition は reason・state patch・budget rule を持つ\n\nここが分かれば、開発者は高完成度 agent を 0 から組み直せます。\n"
+ },
+ {
+ "version": null,
+ "slug": "s00d-chapter-order-rationale",
+ "locale": "ja",
+ "title": "s00d: Chapter Order Rationale",
+ "kind": "bridge",
+ "filename": "s00d-chapter-order-rationale.md",
+ "content": "# s00d: Chapter Order Rationale\n\n> この資料は 1 つの仕組みを説明するためのものではありません。 \n> もっと基礎的な問いに答えるための資料です:\n>\n> **なぜこの教材は今の順序で教えるのか。なぜ source file の並びや機能の派手さ、実装難度の順ではないのか。**\n\n## 先に結論\n\n現在の `s01 -> s19` の順序は妥当です。\n\nこの順序の価値は、単に章数が多いことではなく、学習者が理解すべき依存順でシステムを育てていることです。\n\n1. 最小の agent loop を作る\n2. その loop の周囲に control plane と hardening を足す\n3. session 内 planning を durable work と runtime state へ広げる\n4. その後で teammate、isolation lane、external capability へ広げる\n\nつまりこの教材は:\n\n**mechanism の依存順**\n\nで構成されています。\n\n## 4 本の依存線\n\nこの教材は大きく 4 本の依存線で並んでいます。\n\n1. `core loop dependency`\n2. `control-plane dependency`\n3. `work-state dependency`\n4. `platform-boundary dependency`\n\n雑に言うと:\n\n```text\nまず agent を動かす\n -> 次に安全に動かす\n -> 次に長く動かす\n -> 最後に platform として動かす\n```\n\nこれが今の順序の核心です。\n\n## 全体の並び\n\n```text\ns01-s06\n 単一 agent の最小主線を作る\n\ns07-s11\n control plane と hardening を足す\n\ns12-s14\n durable work と runtime を作る\n\ns15-s19\n teammate・protocol・autonomy・worktree・external capability を足す\n```\n\n各段の終わりで、学習者は次のように言えるべきです。\n\n- `s06` の後: 「動く単一 agent harness を自力で作れる」\n- `s11` の後: 「それをより安全に、安定して、拡張しやすくできる」\n- `s14` の後: 「durable task、background runtime、time trigger を整理して説明できる」\n- `s19` の後: 「高完成度 agent platform の外周境界が見えている」\n\n## なぜ前半は今の順序で固定すべきか\n\n### `s01` は必ず最初\n\nここで定義されるのは:\n\n- 最小の入口\n- turn ごとの進み方\n- tool result がなぜ次の model call に戻るのか\n\nこれがないと、後ろの章はすべて空中に浮いた feature 説明になります。\n\n### `s02` は `s01` の直後でよい\n\ntool がない agent は、まだ「話しているだけ」で「作業している」状態ではありません。\n\n`s02` で初めて:\n\n- model が `tool_use` を出す\n- system が handler を選ぶ\n- tool が実行される\n- `tool_result` が loop に戻る\n\nという、harness の実在感が出ます。\n\n### `s03` は `s04` より前であるべき\n\n教育上ここは重要です。\n\n先に教えるべきなのは:\n\n- 現在の agent が自分の仕事をどう整理するか\n\nその後に教えるべきなのが:\n\n- どの仕事を subagent へ切り出すべきか\n\n`s04` を早くしすぎると、subagent が isolation mechanism ではなく逃げ道に見えてしまいます。\n\n### `s05` は `s06` の前で正しい\n\nこの 2 章は同じ問題の前半と後半です。\n\n- `s05`: そもそも不要な知識を context へ入れすぎない\n- `s06`: それでも残る context をどう compact するか\n\n先に膨張を減らし、その後で必要なものだけ compact する。 \nこの順序はとても自然です。\n\n## なぜ `s07-s11` は 1 つの hardening block なのか\n\nこの 5 章は別々に見えて、実は同じ問いに答えています:\n\n**loop はもう動く。では、それをどう安定した本当の system にするか。**\n\n### `s07` は `s08` より前で正しい\n\n先に必要なのは:\n\n- その action を実行してよいか\n- deny するか\n- user に ask するか\n\nという gate の考え方です。\n\nその後で:\n\n- loop の周囲に何を hook するか\n\nを教える方が自然です。\n\nつまり:\n\n**gate が先、extend が後**\n\nです。\n\n### `s09` は `s10` より前で正しい\n\n`s09` は:\n\n- durable information が何か\n- 何を long-term に残すべきか\n\nを教えます。\n\n`s10` は:\n\n- 複数の入力源をどう model input に組み立てるか\n\nを教えます。\n\nつまり:\n\n- memory は content source を定義する\n- prompt assembly は source たちの組み立て順を定義する\n\n逆にすると、prompt pipeline が不自然で謎の文字列操作に見えやすくなります。\n\n### `s11` はこの block の締めとして適切\n\nerror recovery は独立した機能ではありません。\n\nここで system は初めて:\n\n- なぜ continue するのか\n- なぜ retry するのか\n- なぜ stop するのか\n\nを明示する必要があります。\n\nそのためには、input path、tool path、state path、control path が先に見えている必要があります。\n\n## なぜ `s12-s14` は goal -> runtime -> schedule の順なのか\n\nここは順番を崩すと一気に混乱します。\n\n### `s12` は `s13` より先\n\n`s12` は:\n\n- 仕事そのものが何か\n- dependency がどう張られるか\n- downstream work がいつ unlock されるか\n\nを教えます。\n\n`s13` は:\n\n- 今まさに何が live execution として動いているか\n- background result がどこへ戻るか\n- runtime state がどう write-back されるか\n\nを教えます。\n\nつまり:\n\n- `task` は durable goal\n- `runtime task` は live execution slot\n\nです。\n\nここを逆にすると、この 2 つが一語の task に潰れてしまいます。\n\n### `s14` は `s13` の後であるべき\n\ncron は別種の task を増やす章ではありません。\n\n追加するのは:\n\n**time という start condition**\n\nです。\n\nだから自然な順序は:\n\n`durable task graph -> runtime slot -> schedule trigger`\n\nになります。\n\n## なぜ `s15-s19` は team -> protocol -> autonomy -> worktree -> capability bus なのか\n\n### `s15` で system 内に誰が持続するかを定義する\n\nprotocol や autonomy より前に必要なのは durable actor です。\n\n- teammate は誰か\n- どんな identity を持つか\n- どう持続するか\n\n### `s16` で actor 間の coordination rule を定義する\n\nprotocol は actor より先には来ません。\n\nprotocol は次を構造化するために存在します。\n\n- 誰が request するか\n- 誰が approve するか\n- 誰が respond するか\n- どう trace するか\n\n### `s17` はその後で初めて明確になる\n\nautonomy は曖昧に説明しやすい概念です。\n\nしかし本当に必要なのは:\n\n- persistent teammate がすでに存在する\n- structured coordination がすでに存在する\n\nという前提です。\n\nそうでないと autonomous claim は魔法っぽく見えてしまいます。\n\n### `s18` は `s19` より前がよい\n\nworktree isolation は local execution boundary の問題です。\n\n- 並列作業がどこで走るか\n- lane 同士をどう隔離するか\n\nこれを先に見せてから:\n\n- plugin\n- MCP server\n- external capability route\n\nへ進む方が、自作実装の足場が崩れません。\n\n### `s19` は最後で正しい\n\nここは platform の最外周です。\n\nlocal の:\n\n- actor\n- lane\n- durable task\n- runtime execution\n\nが見えた後で、ようやく:\n\n- external capability provider\n\nがきれいに入ってきます。\n\n## コースを悪くする 5 つの誤った並べ替え\n\n1. `s04` を `s03` より前に動かす \n local planning より先に delegation を教えてしまう。\n\n2. `s10` を `s09` より前に動かす \n input source の理解なしに prompt assembly を教えることになる。\n\n3. `s13` を `s12` より前に動かす \n durable goal と live runtime slot が混ざる。\n\n4. `s17` を `s15` や `s16` より前に動かす \n autonomy が曖昧な polling magic に見える。\n\n5. `s19` を `s18` より前に動かす \n local platform boundary より external capability が目立ってしまう。\n\n## Maintainer が順序変更前に確認すべきこと\n\n章を動かす前に次を確認するとよいです。\n\n1. 前提概念はすでに前で説明されているか\n2. この変更で別の層の概念同士が混ざらないか\n3. この章が主に追加するのは goal か、runtime state か、actor か、capability boundary か\n4. これを早めても、学習者は最小正解版をまだ自力で作れるか\n5. これは開発者理解のための変更か、それとも source file の順を真似ているだけか\n\n5 番目が後者なら、たいてい変更しない方がよいです。\n\n## 一文で残すなら\n\n**良い章順とは、mechanism の一覧ではなく、各章が前章から自然に伸びた次の層として見える並びです。**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00e-reference-module-map",
+ "locale": "ja",
+ "title": "s00e: 参照リポジトリのモジュール対応表",
+ "kind": "bridge",
+ "filename": "s00e-reference-module-map.md",
+ "content": "# s00e: 参照リポジトリのモジュール対応表\n\n> これは保守者と本気で学ぶ読者向けの校正文書です。 \n> 逆向きソースを逐行で読ませるための資料ではありません。\n>\n> ここで答えたいのは、次の一点です。\n>\n> **参照リポジトリの高信号なモジュール群と現在の教材の章順を突き合わせると、今のカリキュラム順は本当に妥当なのか。**\n\n## 結論\n\n妥当です。\n\n現在の `s01 -> s19` の順序は大筋で正しく、単純に「ソースツリーの並び順」に合わせるより、実際の設計主幹に近いです。\n\n理由は単純です。\n\n- 参照リポジトリには表層のディレクトリがたくさんある\n- しかし本当に設計の重みを持つのは、制御・状態・タスク・チーム・worktree・外部 capability に関する一部のクラスタ\n- それらは現在の 4 段階の教材構成ときれいに対応している\n\nしたがって、すべきことは「教材をソース木順へ潰す」ことではありません。\n\nすべきことは:\n\n- 今の依存関係ベースの順序を維持する\n- 参照リポジトリとの対応を明文化する\n- 主線に不要な製品周辺の細部を入れ過ぎない\n\n## この比較で見た高信号クラスタ\n\n主に次のようなモジュール群を見ています。\n\n- `Tool.ts`\n- `state/AppStateStore.ts`\n- `coordinator/coordinatorMode.ts`\n- `memdir/*`\n- `services/SessionMemory/*`\n- `services/toolUseSummary/*`\n- `constants/prompts.ts`\n- `tasks/*`\n- `tools/TodoWriteTool/*`\n- `tools/AgentTool/*`\n- `tools/ScheduleCronTool/*`\n- `tools/EnterWorktreeTool/*`\n- `tools/ExitWorktreeTool/*`\n- `tools/MCPTool/*`\n- `services/mcp/*`\n- `plugins/*`\n- `hooks/toolPermission/*`\n\nこれだけで、設計主脈絡の整合性は十分に判断できます。\n\n## 対応関係\n\n| 参照リポジトリのクラスタ | 典型例 | 対応する教材章 | この配置が妥当な理由 |\n|---|---|---|---|\n| Query ループと制御状態 | `Tool.ts`、`AppStateStore.ts`、query / coordinator 状態 | `s00`、`s00a`、`s00b`、`s01`、`s11` | 実システムは `messages[] + while True` だけではない。教材が最小ループから始め、後で control plane を補う流れは正しい。 |\n| Tool routing と実行面 | `Tool.ts`、native tools、tool context、実行 helper | `s02`、`s02a`、`s02b` | 参照実装は tools を共有 execution plane として扱っている。教材の分け方は妥当。 |\n| セッション計画 | `TodoWriteTool` | `s03` | セッション内の進行整理は小さいが重要な層で、持続タスクより先に学ぶべき。 |\n| 一回きりの委譲 | `AgentTool` の最小部分 | `s04` | 参照実装の agent machinery は大きいが、教材がまず「新しい文脈 + サブタスク + 要約返却」を教えるのは正しい。 |\n| Skill の発見と読み込み | `DiscoverSkillsTool`、`skills/*`、関連 prompt | `s05` | skills は飾りではなく知識注入層なので、prompt の複雑化より前に置くのが自然。 |\n| Context 圧縮と collapse | `services/toolUseSummary/*`、`services/contextCollapse/*` | `s06` | 参照実装に明示的な compact 層がある以上、これを早めに学ぶ構成は正しい。 |\n| Permission gate | `types/permissions.ts`、`hooks/toolPermission/*` | `s07` | 実行可否は独立した gate であり、単なる hook ではない。 |\n| Hooks と周辺拡張 | `types/hooks.ts`、hook runner | `s08` | 参照実装でも gate と extend は分かれている。順序は現状のままでよい。 |\n| Durable memory | `memdir/*`、`services/SessionMemory/*` | `s09` | memory は「何でも残すノート」ではなく、選択的な跨セッション層として扱われている。 |\n| Prompt 組み立て | `constants/prompts.ts`、prompt sections | `s10`、`s10a` | 入力は複数 source の合成物であり、教材が pipeline として説明するのは正しい。 |\n| Recovery / continuation | query transition、retry、compact retry、token recovery | `s11`、`s00c` | 続行理由は実システムで明示的に存在するため、前段の層を理解した後に学ぶのが自然。 |\n| Durable work graph | task record、dependency unlock | `s12` | 会話内の plan と durable work graph を分けている点が妥当。 |\n| Live runtime task | `tasks/types.ts`、`LocalShellTask`、`LocalAgentTask`、`RemoteAgentTask` | `s13`、`s13a` | 参照実装の runtime task union は、`TaskRecord` と `RuntimeTaskState` を分けるべき強い根拠になる。 |\n| Scheduled trigger | `ScheduleCronTool/*`、`useScheduledTasks` | `s14` | scheduling は runtime work の上に乗る開始条件なので、この順序でよい。 |\n| Persistent teammate | `InProcessTeammateTask`、team tools、agent registry | `s15` | 一回限りの subagent から durable actor へ広がる流れが参照実装にある。 |\n| Structured protocol | send-message、request tracking、coordinator mode | `s16` | protocol は actor が先に存在して初めて意味を持つ。 |\n| Autonomous claim / resume | task claiming、async worker lifecycle、resume logic | `s17` | autonomy は actor と task と protocol の上に成り立つ。 |\n| Worktree lane | `EnterWorktreeTool`、`ExitWorktreeTool`、worktree helper | `s18` | worktree は単なる git 小技ではなく、実行レーンと closeout 状態の仕組み。 |\n| External capability bus | `MCPTool`、`services/mcp/*`、`plugins/*` | `s19`、`s19a` | 参照実装でも MCP / plugin は外側の platform boundary にある。最後に置くのが正しい。 |\n\n## 特に強く裏付けられた 5 点\n\n### 1. `s03` は `s12` より前でよい\n\n参照実装には:\n\n- セッション内の小さな計画\n- 持続する task / runtime machinery\n\nの両方があります。\n\nこれは同じものではありません。\n\n### 2. `s09` は `s10` より前でよい\n\nprompt assembly は memory を含む複数 source を組み立てます。\n\nしたがって:\n\n- 先に memory という source を理解する\n- その後で prompt pipeline を理解する\n\nの順が自然です。\n\n### 3. `s12` は `s13` より前でなければならない\n\n`tasks/types.ts` に見える runtime task union は非常に重要です。\n\nこれは:\n\n- durable な仕事目標\n- 今まさに動いている実行スロット\n\nが別物であることをはっきり示しています。\n\n### 4. `s15 -> s16 -> s17` の順は妥当\n\n参照実装でも:\n\n- actor\n- protocol\n- autonomy\n\nの順で積み上がっています。\n\n### 5. `s18` は `s19` より前でよい\n\nworktree はまずローカルな実行境界として理解されるべきです。\n\nそのあとで:\n\n- plugin\n- MCP server\n- 外部 capability provider\n\nへ広げる方が、心智がねじれません。\n\n## 教材主線に入れ過ぎない方がよいもの\n\n参照リポジトリに実在していても、主線へ入れ過ぎるべきではないものがあります。\n\n- CLI command 面の広がり\n- UI rendering の細部\n- telemetry / analytics 分岐\n- remote / enterprise の配線\n- compatibility layer\n- ファイル名や行番号レベルの trivia\n\nこれらは本番では意味があります。\n\nただし 0 から 1 の教材主線の中心ではありません。\n\n## 教材側が特に注意すべき点\n\n### 1. Subagent と Teammate を混ぜない\n\n参照実装の `AgentTool` は:\n\n- 一回きりの委譲\n- background worker\n- persistent teammate\n- worktree-isolated worker\n\nをまたいでいます。\n\nだからこそ教材では:\n\n- `s04`\n- `s15`\n- `s17`\n- `s18`\n\nに分けて段階的に教える方がよいです。\n\n### 2. Worktree を「git の小技」へ縮めない\n\n参照実装には keep / remove、resume、cleanup、dirty check があります。\n\n`s18` は今後も:\n\n- lane identity\n- task binding\n- closeout\n- cleanup\n\nを教える章として保つべきです。\n\n### 3. MCP を「外部 tool 一覧」へ縮めない\n\n参照実装には tools 以外にも:\n\n- resources\n- prompts\n- elicitation / connection state\n- plugin mediation\n\nがあります。\n\nしたがって `s19` は tools-first で入ってよいですが、capability bus という外側の境界も説明すべきです。\n\n## 最終判断\n\n参照リポジトリの高信号クラスタと照らす限り、現在の章順は妥当です。\n\n今後の大きな加点ポイントは、さらに大規模な並べ替えではなく:\n\n- bridge docs の充実\n- エンティティ境界の明確化\n- 多言語の整合\n- web 側での学習導線の明快さ\n\nにあります。\n\n## 一文で覚える\n\n**よい教材順は、ファイルが並んでいる順ではなく、学習者が依存関係に沿って実装を再構成できる順です。**\n"
+ },
+ {
+ "version": null,
+ "slug": "s00f-code-reading-order",
+ "locale": "ja",
+ "title": "s00f: このリポジトリのコード読解順",
+ "kind": "bridge",
+ "filename": "s00f-code-reading-order.md",
+ "content": "# s00f: このリポジトリのコード読解順\n\n> このページは「もっと多くコードを読め」という話ではありません。 \n> もっと狭い問題を解決します。\n>\n> **章順が安定したあと、このリポジトリのコードをどんな順で読めば心智モデルを崩さずに理解できるのか。**\n\n## 先に結論\n\n次の読み方は避けます。\n\n- いちばん長いファイルから読む\n- いちばん高度そうな章へ飛ぶ\n- 先に `web/` を開いて主線を逆算する\n- `agents/*.py` 全体を 1 つの平坦なソース群として眺める\n\n安定したルールは 1 つです。\n\n**コードもカリキュラムと同じ順番で読む。**\n\n各章ファイルの中では、毎回同じ順で読みます。\n\n1. 状態構造\n2. tool 定義や registry\n3. 1 ターンを進める関数\n4. CLI 入口は最後\n\n## なぜこのページが必要か\n\n読者が詰まるのは文章だけではありません。実際にコードを開いた瞬間に、間違った場所から読み始めてまた混ざることが多いからです。\n\n## どの agent ファイルでも同じテンプレートで読む\n\n### 1. まずファイル先頭\n\n最初に答えること:\n\n- この章は何を教えているか\n- まだ何を故意に教えていないか\n\n### 2. 状態構造や manager class\n\n優先して探すもの:\n\n- `LoopState`\n- `PlanningState`\n- `CompactState`\n- `TaskManager`\n- `BackgroundManager`\n- `TeammateManager`\n- `WorktreeManager`\n\n### 3. tool 一覧や registry\n\n優先して見る入口:\n\n- `TOOLS`\n- `TOOL_HANDLERS`\n- `build_tool_pool()`\n- 主要な `run_*`\n\n### 4. ターンを進める関数\n\nたとえば:\n\n- `run_one_turn(...)`\n- `agent_loop(...)`\n- 章固有の `handle_*`\n\n### 5. CLI 入口は最後\n\n`if __name__ == \"__main__\"` は大事ですが、最初に見る場所ではありません。\n\n## Stage 1: `s01-s06`\n\nこの段階は single-agent の背骨です。\n\n| 章 | ファイル | 先に見るもの | 次に見るもの | 次へ進む前に確認すること |\n|---|---|---|---|---|\n| `s01` | `agents/s01_agent_loop.py` | `LoopState` | `TOOLS` -> `run_one_turn()` -> `agent_loop()` | `messages -> model -> tool_result -> next turn` を追える |\n| `s02` | `agents/s02_tool_use.py` | `safe_path()` | handler 群 -> `TOOL_HANDLERS` -> `agent_loop()` | ループを変えずに tool が増える形が分かる |\n| `s03` | `agents/s03_todo_write.py` | planning state | todo 更新経路 -> `agent_loop()` | 会話内 plan の外化が分かる |\n| `s04` | `agents/s04_subagent.py` | `AgentTemplate` | `run_subagent()` -> 親 `agent_loop()` | 文脈隔離としての subagent が分かる |\n| `s05` | `agents/s05_skill_loading.py` | skill registry | registry 周り -> `agent_loop()` | discover light / load deep が分かる |\n| `s06` | `agents/s06_context_compact.py` | `CompactState` | compact 周辺 -> `agent_loop()` | compact の本質が分かる |\n\n## Stage 2: `s07-s11`\n\nここは control plane を固める段階です。\n\n| 章 | ファイル | 先に見るもの | 次に見るもの | 次へ進む前に確認すること |\n|---|---|---|---|---|\n| `s07` | `agents/s07_permission_system.py` | validator / manager | permission path -> `agent_loop()` | gate before execute |\n| `s08` | `agents/s08_hook_system.py` | `HookManager` | hook dispatch -> `agent_loop()` | 固定拡張点としての hook |\n| `s09` | `agents/s09_memory_system.py` | memory manager | save / prompt build -> `agent_loop()` | 長期情報層としての memory |\n| `s10` | `agents/s10_system_prompt.py` | `SystemPromptBuilder` | input build -> `agent_loop()` | pipeline としての prompt |\n| `s11` | `agents/s11_error_recovery.py` | compact / backoff helper | recovery 分岐 -> `agent_loop()` | 失敗後の続行 |\n\n## Stage 3: `s12-s14`\n\nここから harness は work runtime へ広がります。\n\n| 章 | ファイル | 先に見るもの | 次に見るもの | 次へ進む前に確認すること |\n|---|---|---|---|---|\n| `s12` | `agents/s12_task_system.py` | `TaskManager` | task create / unlock -> `agent_loop()` | durable goal |\n| `s13` | `agents/s13_background_tasks.py` | `NotificationQueue` / `BackgroundManager` | background registration -> `agent_loop()` | runtime slot |\n| `s14` | `agents/s14_cron_scheduler.py` | `CronLock` / `CronScheduler` | trigger path -> `agent_loop()` | 未来の開始条件 |\n\n## Stage 4: `s15-s19`\n\nここは platform 境界を作る段階です。\n\n| 章 | ファイル | 先に見るもの | 次に見るもの | 次へ進む前に確認すること |\n|---|---|---|---|---|\n| `s15` | `agents/s15_agent_teams.py` | `MessageBus` / `TeammateManager` | roster / inbox / loop -> `agent_loop()` | persistent teammate |\n| `s16` | `agents/s16_team_protocols.py` | `RequestStore` | request handler -> `agent_loop()` | request-response + `request_id` |\n| `s17` | `agents/s17_autonomous_agents.py` | claim helper / identity helper | claim -> resume -> `agent_loop()` | idle check -> safe claim -> resume |\n| `s18` | `agents/s18_worktree_task_isolation.py` | manager 群 | worktree lifecycle -> `agent_loop()` | goal と execution lane の分離 |\n| `s19` | `agents/s19_mcp_plugin.py` | capability 周辺 class | route / normalize -> `agent_loop()` | external capability が同じ control plane に戻ること |\n\n## 最良の「文書 + コード」学習ループ\n\n各章で次を繰り返します。\n\n1. 章本文を読む\n2. bridge doc を読む\n3. 対応する `agents/sXX_*.py` を開く\n4. 状態 -> tools -> turn driver -> CLI 入口 の順で読む\n5. demo を 1 回動かす\n6. 最小版を自分で書き直す\n\n## 一言で言うと\n\n**コード読解順も教学順に従うべきです。まず境界、その次に状態、最後に主ループをどう進めるかを見ます。**\n"
},
{
"version": "s01",
+ "slug": "s01-the-agent-loop",
"locale": "ja",
"title": "s01: The Agent Loop",
- "content": "# s01: The Agent Loop\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- 1つのツール + 1つのループ = エージェント。\n\n## 問題\n\n言語モデルはコードについて推論できるが、現実世界に触れられない。ファイルを読めず、テストを実行できず、エラーを確認できない。ループがなければ、ツール呼び出しのたびにユーザーが手動で結果をコピーペーストする必要がある。つまりユーザー自身がループになる。\n\n## 解決策\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\n1つの終了条件がフロー全体を制御する。モデルがツール呼び出しを止めるまでループが回り続ける。\n\n## 仕組み\n\n1. ユーザーのプロンプトが最初のメッセージになる。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. メッセージとツール定義をLLMに送信する。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. アシスタントのレスポンスを追加し、`stop_reason`を確認する。ツールが呼ばれなければ終了。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. 各ツール呼び出しを実行し、結果を収集してuserメッセージとして追加。ステップ2に戻る。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n1つの関数にまとめると:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nこれでエージェント全体が30行未満に収まる。本コースの残りはすべてこのループの上に積み重なる -- ループ自体は変わらない。\n\n## 変更点\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n"
+ "kind": "chapter",
+ "filename": "s01-the-agent-loop.md",
+ "content": "# s01: The Agent Loop\n\n`s00 > [ s01 ] > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *loop がなければ agent は生まれません。* \n> この章では、最小だけれど正しい loop を先に作り、そのあとで「なぜ後ろの章で control plane が必要になるのか」を理解できる土台を作ります。\n\n## この章が解く問題\n\n言語 model 自体は「次にどんな文字列を出すか」を予測する存在です。\n\nそれだけでは自分で次のことはできません。\n\n- file を開く\n- command を実行する\n- error を観察する\n- その観察結果を次の判断へつなぐ\n\nもし system 側に次の流れを繰り返す code がなければ、\n\n```text\nmodel に聞く\n ->\ntool を使いたいと言う\n ->\n本当に実行する\n ->\n結果を model へ戻す\n ->\n次の一手を考えさせる\n```\n\nmodel は「会話できる program」に留まり、「仕事を進める agent」にはなりません。\n\nだからこの章の目標は 1 つです。\n\n**model と tool を閉ループに接続し、仕事を継続的に前へ進める最小 agent を作ること**\n\nです。\n\n## 先に言葉をそろえる\n\n### loop とは何か\n\nここでの `loop` は「無意味な無限ループ」ではありません。\n\n意味は、\n\n> 仕事がまだ終わっていない限り、同じ処理手順を繰り返す主循環\n\nです。\n\n### turn とは何か\n\n`turn` は 1 ラウンドです。\n\n最小版では 1 turn にだいたい次が入ります。\n\n1. 現在の messages を model に送る\n2. model response を受け取る\n3. tool_use があれば tool を実行する\n4. tool_result を messages に戻す\n\nそのあとで次の turn へ進むか、終了するかが決まります。\n\n### tool_result とは何か\n\n`tool_result` は terminal 上の一時ログではありません。\n\n正しくは、\n\n> model が次の turn で読めるよう、message history へ書き戻される結果 block\n\nです。\n\n### state とは何か\n\n`state` は、その loop が前へ進むために持ち続ける情報です。\n\nこの章の最小 state は次です。\n\n- `messages`\n- `turn_count`\n- 次 turn に続く理由\n\n## 最小心智モデル\n\nまず agent 全体を次の回路として見てください。\n\n```text\nuser message\n |\n v\nLLM\n |\n +-- 普通の返答 ----------> 終了\n |\n +-- tool_use ----------> tool 実行\n |\n v\n tool_result\n |\n v\n messages へ write-back\n |\n v\n 次の turn\n```\n\nこの図の中で一番重要なのは `while True` という文法ではありません。\n\n最も重要なのは次の 1 文です。\n\n**tool の結果は message history に戻され、次の推論入力になる**\n\nここが欠けると、model は現実の観察を踏まえて次の一手を考えられません。\n\n## この章の核になるデータ構造\n\n### 1. Message\n\n最小教材版では、message はまず次の形で十分です。\n\n```python\n{\"role\": \"user\", \"content\": \"...\"}\n{\"role\": \"assistant\", \"content\": [...]}\n```\n\nここで忘れてはいけないのは、\n\n**message history は UI 表示用の chat transcript ではなく、次 turn の作業 context**\n\nだということです。\n\n### 2. Tool Result Block\n\ntool 実行後は、その出力を対応する block として messages へ戻します。\n\n```python\n{\n \"type\": \"tool_result\",\n \"tool_use_id\": \"...\",\n \"content\": \"...\",\n}\n```\n\n`tool_use_id` は単純に、\n\n> どの tool 呼び出しに対応する結果か\n\nを model に示すための ID です。\n\n### 3. LoopState\n\nこの章では散らばった local variable だけで済ませるより、\n\n> loop が持つ state を 1 か所へ寄せて見る\n\n癖を作る方が後で効きます。\n\n最小形は次で十分です。\n\n```python\nstate = {\n \"messages\": [...],\n \"turn_count\": 1,\n \"transition_reason\": None,\n}\n```\n\nここでの `transition_reason` はまず、\n\n> なぜこの turn のあとにさらに続くのか\n\nを示す field とだけ理解してください。\n\nこの章の最小版では、理由は 1 種類でも十分です。\n\n```python\n\"tool_result\"\n```\n\nつまり、\n\n> tool を実行したので、その結果を踏まえてもう一度 model を呼ぶ\n\nという continuation です。\n\n## 最小実装を段階で追う\n\n### 第 1 段階: 初期 message を作る\n\nまず user request を history に入れます。\n\n```python\nmessages = [{\"role\": \"user\", \"content\": query}]\n```\n\n### 第 2 段階: model を呼ぶ\n\nmessages、system prompt、tools をまとめて model に送ります。\n\n```python\nresponse = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n)\n```\n\n### 第 3 段階: assistant response 自体も history へ戻す\n\n```python\nmessages.append({\n \"role\": \"assistant\",\n \"content\": response.content,\n})\n```\n\nここは初心者がとても落としやすい点です。\n\n「最終答えだけ取れればいい」と思って assistant response を保存しないと、次 turn の context が切れます。\n\n### 第 4 段階: tool_use があればจริง行する\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nこの段階で初めて、model の意図が real execution へ落ちます。\n\n### 第 5 段階: tool_result を user-side message として write-back する\n\n```python\nmessages.append({\n \"role\": \"user\",\n \"content\": results,\n})\n```\n\nこれで次 turn の model は、\n\n- さっき自分が何を要求したか\n- その結果が何だったか\n\nを両方読めます。\n\n### 全体を 1 つの loop にまとめる\n\n```python\ndef agent_loop(state):\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=state[\"messages\"],\n tools=TOOLS,\n max_tokens=8000,\n )\n\n state[\"messages\"].append({\n \"role\": \"assistant\",\n \"content\": response.content,\n })\n\n if response.stop_reason != \"tool_use\":\n state[\"transition_reason\"] = None\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_tool(block)\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n\n state[\"messages\"].append({\n \"role\": \"user\",\n \"content\": results,\n })\n state[\"turn_count\"] += 1\n state[\"transition_reason\"] = \"tool_result\"\n```\n\nこれがこの course 全体の核です。\n\n後ろの章で何が増えても、\n\n**model を呼び、tool を実行し、result を戻して、必要なら続く**\n\nという骨格自体は残ります。\n\n## この章でわざと単純化していること\n\nこの章では最初から複雑な control plane を教えません。\n\nまだ出していないもの:\n\n- permission gate\n- hook\n- memory\n- prompt assembly pipeline\n- recovery branch\n- compact 後の continuation\n\nなぜなら初学者が最初に理解すべきなのは、\n\n**agent の最小閉ループ**\n\nだからです。\n\nもし最初から複数の continuation reason や recovery branch を混ぜると、\n読者は「loop そのもの」が見えなくなります。\n\n## 高完成度 system ではどう広がるか\n\n教材版は最も重要な骨格だけを教えます。\n\n高完成度 system では、その同じ loop の外側に次の層が足されます。\n\n| 観点 | この章の最小版 | 高完成度 system |\n|---|---|---|\n| loop 形状 | 単純な `while True` | event-driven / streaming continuation |\n| 継続理由 | `tool_result` が中心 | retry、compact resume、recovery など複数 |\n| tool execution | response 全体を見てから実行 | 並列実行や先行起動を含む runtime |\n| state | `messages` 中心 | turn、budget、transition、recovery を explicit に持つ |\n| error handling | ほぼなし | truncation、transport error、retry branch |\n| observability | 最小 | progress event、structured logs、UI stream |\n\nここで覚えるべき本質は細かな branch 名ではありません。\n\n本質は次の 1 文です。\n\n**agent は最後まで「結果を model に戻し続ける loop」であり、周囲に state 管理と continuation の理由が増えていく**\n\nということです。\n\n## この章を読み終えたら何が言えるべきか\n\n1. model だけでは agent にならず、tool result を戻す loop が必要\n2. assistant response 自体も history に残さないと次 turn が切れる\n3. tool_result は terminal log ではなく、次 turn の input block である\n\n## 一文で覚える\n\n**agent loop とは、model の要求を現実の観察へ変え、その観察をまた model に返し続ける主循環です。**\n"
},
{
"version": "s02",
+ "slug": "s02-tool-use",
"locale": "ja",
"title": "s02: Tool Use",
- "content": "# s02: Tool Use\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"ツールを足すなら、ハンドラーを1つ足すだけ\"* -- ループは変わらない。新ツールは dispatch map に登録するだけ。\n\n## 問題\n\n`bash`だけでは、エージェントは何でもシェル経由で行う。`cat`は予測不能に切り詰め、`sed`は特殊文字で壊れ、すべてのbash呼び出しが制約のないセキュリティ面になる。`read_file`や`write_file`のような専用ツールなら、ツールレベルでパスのサンドボックス化を強制できる。\n\n重要な点: ツールを追加してもループの変更は不要。\n\n## 解決策\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 仕組み\n\n1. 各ツールにハンドラ関数を定義する。パスのサンドボックス化でワークスペース外への脱出を防ぐ。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. ディスパッチマップがツール名とハンドラを結びつける。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. ループ内で名前によりハンドラをルックアップする。ループ本体はs01から不変。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nツール追加 = ハンドラ追加 + スキーマ追加。ループは決して変わらない。\n\n## s01からの変更点\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n"
+ "kind": "chapter",
+ "filename": "s02-tool-use.md",
+ "content": "# s02: Tool Use\n\n`s01 > [ s02 ] > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *\"ツールを足すなら、ハンドラーを1つ足すだけ\"* -- ループは変わらない。新ツールは dispatch map に登録するだけ。\n>\n> **Harness 層**: ツール分配 -- モデルが届く範囲を広げる。\n\n## 問題\n\n`bash`だけでは、エージェントは何でもシェル経由で行う。`cat`は予測不能に切り詰め、`sed`は特殊文字で壊れ、すべてのbash呼び出しが制約のないセキュリティ面になる。`read_file`や`write_file`のような専用ツールなら、ツールレベルでパスのサンドボックス化を強制できる。\n\n重要な点: ツールを追加してもループの変更は不要。\n\n## 解決策\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 仕組み\n\n1. 各ツールにハンドラ関数を定義する。パスのサンドボックス化でワークスペース外への脱出を防ぐ。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. ディスパッチマップがツール名とハンドラを結びつける。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. ループ内で名前によりハンドラをルックアップする。ループ本体はs01から不変。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nツール追加 = ハンドラ追加 + スキーマ追加。ループは決して変わらない。\n\n## s01からの変更点\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n\n## 教学上の簡略化\n\nこの章で本当に学ぶべきなのは、細かな production 差分ではありません。\n\n学ぶべき中心は次の 4 点です。\n\n1. モデルに見せる tool schema がある\n2. 実装側には handler がある\n3. 両者は dispatch map で結ばれる\n4. 実行結果は `tool_result` として主ループへ戻る\n\nより完成度の高い system では、この周りに権限、hook、並列実行、結果永続化、外部 capability routing などが増えていきます。\n\nしかし、それらをここで全部追い始めると、初学者は\n\n- schema と handler の違い\n- dispatch map の役割\n- `tool_result` がなぜ主ループへ戻るのか\n\nという本章の主眼を見失いやすくなります。\n\nこの段階では、まず\n\n**新しい tool を足しても主ループ自体は作り替えなくてよい**\n\nという設計の強さを、自分で実装して理解できれば十分です。\n"
+ },
+ {
+ "version": null,
+ "slug": "s02a-tool-control-plane",
+ "locale": "ja",
+ "title": "s02a: Tool Control Plane",
+ "kind": "bridge",
+ "filename": "s02a-tool-control-plane.md",
+ "content": "# s02a: Tool Control Plane\n\n> これは `s02` を深く理解するための橋渡し文書です。 \n> 問いたいのは:\n>\n> **なぜ tool system は単なる `tool_name -> handler` 表では足りないのか。**\n\n## 先に結論\n\n最小 demo では dispatch map だけでも動きます。\n\nしかし高完成度の system では tool layer は次の責任をまとめて持ちます。\n\n- tool schema をモデルへ見せる\n- tool 名から実行先を解決する\n- 実行前に permission を通す\n- hook / classifier / side check を差し込む\n- 実行中 progress を扱う\n- 結果を整形して loop へ戻す\n- 実行で変わる共有 state へアクセスする\n\nつまり tool layer は:\n\n**関数表ではなく、共有 execution plane**\n\nです。\n\n## 最小の心智モデル\n\n```text\nmodel emits tool_use\n |\n v\ntool spec lookup\n |\n v\npermission / hook / validation\n |\n v\nactual execution\n |\n v\ntool result shaping\n |\n v\nwrite-back to loop\n```\n\n## `dispatch map` だけでは足りない理由\n\n単なる map だと、せいぜい:\n\n- この名前ならこの関数\n\nしか表せません。\n\nでも実システムで必要なのは:\n\n- モデルへ何を見せるか\n- 実行前に何を確認するか\n- 実行中に何を表示するか\n- 実行後にどんな result block を返すか\n- どの shared context を触れるか\n\nです。\n\n## 主要なデータ構造\n\n### `ToolSpec`\n\nモデルに見せる tool の定義です。\n\n```python\ntool = {\n \"name\": \"read_file\",\n \"description\": \"...\",\n \"input_schema\": {...},\n}\n```\n\n### `ToolDispatchMap`\n\n名前から handler を引く表です。\n\n```python\ndispatch = {\n \"read_file\": run_read,\n \"bash\": run_bash,\n}\n```\n\nこれは必要ですが、これだけでは足りません。\n\n### `ToolUseContext`\n\ntool が共有状態へ触るための文脈です。\n\nたとえば:\n\n- app state getter / setter\n- permission context\n- notifications\n- file-state cache\n- current agent identity\n\nなどが入ります。\n\n### `ToolResultEnvelope`\n\nloop へ返すときの整形済み result です。\n\n```python\n{\n \"type\": \"tool_result\",\n \"tool_use_id\": \"...\",\n \"content\": \"...\",\n}\n```\n\n高完成度版では content だけでなく:\n\n- progress\n- warnings\n- structured result\n\nなども関わります。\n\n## 実行面として見ると何が変わるか\n\n### 1. Tool は「名前」ではなく「実行契約」になる\n\n1つの tool には:\n\n- 入力 schema\n- 実行権限\n- 実行時 context\n- 出力の形\n\nがひとまとまりで存在します。\n\n### 2. Permission と Hook の差が見えやすくなる\n\n- permission: 実行してよいか\n- hook: 実行の周辺で何を足すか\n\n### 3. Native / Task / Agent / MCP を同じ平面で見やすくなる\n\n参照実装でも重要なのは:\n\n**能力の出どころが違っても、loop から見れば 1 つの tool execution plane に入る**\n\nという点です。\n\n## 初学者がやりがちな誤り\n\n### 1. tool spec と handler を混同する\n\n- spec はモデル向け説明\n- handler は実行コード\n\n### 2. permission を handler の中へ埋め込む\n\nこれをやると gate が共有層にならず、system が読みにくくなります。\n\n### 3. result shaping を軽く見る\n\ntool 実行結果は「文字列が返ればよい」ではありません。\n\nloop が読み戻しやすい形に整える必要があります。\n\n### 4. 実行状態を `messages[]` だけで持とうとする\n\ntool 実行は app state や runtime state を触ることがあります。\n\n## 一文で覚える\n\n**tool system が本物らしくなるのは、名前から関数を呼べた瞬間ではなく、schema・gate・context・result を含む共有 execution plane として見えた瞬間です。**\n"
+ },
+ {
+ "version": null,
+ "slug": "s02b-tool-execution-runtime",
+ "locale": "ja",
+ "title": "s02b: Tool Execution Runtime",
+ "kind": "bridge",
+ "filename": "s02b-tool-execution-runtime.md",
+ "content": "# s02b: Tool Execution Runtime\n\n> この bridge doc は tool の登録方法ではなく、次の問いを扱います。\n>\n> **model が複数の tool call を出したとき、何を基準に並列化し、進捗を出し、結果順を安定させ、context をマージするのか。**\n\n## なぜこの資料が必要か\n\n`s02` では正しく次を教えています。\n\n- tool schema\n- dispatch map\n- `tool_result` の main loop への回流\n\n出発点としては十分です。\n\nただしシステムが大きくなると、本当に難しくなるのはもっと深い層です。\n\n- どの tool は並列実行できるか\n- どの tool は直列でなければならないか\n- 遅い tool は途中 progress を出すべきか\n- 並列結果を完了順で返すのか、元の順序で返すのか\n- tool 実行が共有 context を変更するのか\n- 並列変更をどう安全にマージするのか\n\nこれらはもはや「登録」の話ではありません。\n\nそれは:\n\n**tool execution runtime**\n\nの話です。\n\n## まず用語\n\n### tool execution runtime とは\n\nここでの runtime は言語 runtime の意味ではありません。\n\nここでは:\n\n> tool call が実際に動き始めた後、システムがそれらをどう調度し、追跡し、回写するか\n\nという実行規則のことです。\n\n### concurrency safe とは\n\nconcurrency safe とは:\n\n> 同種の仕事と同時に走っても共有 state を壊しにくい\n\nという意味です。\n\nよくある read-only tool は安全なことが多いです。\n\n- `read_file`\n- いくつかの search tool\n- 読み取り専用の MCP tool\n\n一方で write 系は安全でないことが多いです。\n\n- `write_file`\n- `edit_file`\n- 共有 app state を変える tool\n\n### progress message とは\n\nprogress message とは:\n\n> tool はまだ終わっていないが、「今何をしているか」を先に上流へ見せる更新\n\nのことです。\n\n### context modifier とは\n\nある tool は text result だけでなく共有 runtime context も変更します。\n\n例えば:\n\n- notification queue を更新する\n- 実行中 tool の状態を更新する\n- app state を変更する\n\nこの共有 state 変更を context modifier と考えられます。\n\n## 最小の心智モデル\n\ntool 実行を次のように平坦化しないでください。\n\n```text\ntool_use -> handler -> result\n```\n\nより実像に近い理解は次です。\n\n```text\ntool_use blocks\n ->\nconcurrency safety で partition\n ->\n並列 lane か直列 lane を選ぶ\n ->\n必要なら progress を吐く\n ->\n安定順で結果を回写する\n ->\nqueued context modifiers をマージする\n```\n\nここで大事なのは二つです。\n\n- 並列化は「全部まとめて走らせる」ではない\n- 共有 context は完了順で勝手に書き換えない\n\n## 主要 record\n\n### 1. `ToolExecutionBatch`\n\n教材版なら次の程度の batch 概念で十分です。\n\n```python\nbatch = {\n \"is_concurrency_safe\": True,\n \"blocks\": [tool_use_1, tool_use_2, tool_use_3],\n}\n```\n\n意味は単純です。\n\n- tool を常に 1 個ずつ扱うわけではない\n- runtime はまず execution batch に分ける\n\n### 2. `TrackedTool`\n\n完成度を上げたいなら各 tool を明示的に追跡します。\n\n```python\ntracked_tool = {\n \"id\": \"toolu_01\",\n \"name\": \"read_file\",\n \"status\": \"queued\", # queued / executing / completed / yielded\n \"is_concurrency_safe\": True,\n \"pending_progress\": [],\n \"results\": [],\n \"context_modifiers\": [],\n}\n```\n\nこれにより runtime は次に答えられます。\n\n- 何が待機中か\n- 何が実行中か\n- 何が完了したか\n- 何がすでに progress を出したか\n\n### 3. `MessageUpdate`\n\ntool 実行は最終結果 1 個だけを返すとは限りません。\n\n最小理解は次で十分です。\n\n```python\nupdate = {\n \"message\": maybe_message,\n \"new_context\": current_context,\n}\n```\n\n高完成度 runtime では、更新は通常二つに分かれます。\n\n- すぐ上流へ見せる message update\n- 後で merge すべき内部 context update\n\n### 4. queued context modifiers\n\nこれは見落とされやすいですが、とても重要です。\n\n並列 batch で安全なのは:\n\n> 先に終わった tool がその順で共有 context を先に変える\n\nことではありません。\n\nより安全なのは:\n\n> context modifier を一旦 queue し、最後に元の tool 順序で merge する\n\nことです。\n\n```python\nqueued_context_modifiers = {\n \"toolu_01\": [modify_ctx_a],\n \"toolu_02\": [modify_ctx_b],\n}\n```\n\n## 最小実装の進め方\n\n### Step 1: concurrency safety を判定する\n\n```python\ndef is_concurrency_safe(tool_name: str, tool_input: dict) -> bool:\n return tool_name in {\"read_file\", \"search_files\"}\n```\n\n### Step 2: 実行前に partition する\n\n```python\nbatches = partition_tool_calls(tool_uses)\n\nfor batch in batches:\n if batch[\"is_concurrency_safe\"]:\n run_concurrently(batch[\"blocks\"])\n else:\n run_serially(batch[\"blocks\"])\n```\n\n### Step 3: 並列 lane では progress を先に出せるようにする\n\n```python\nfor update in run_concurrently(...):\n if update.get(\"message\"):\n yield update[\"message\"]\n```\n\n### Step 4: context merge は安定順で行う\n\n```python\nqueued_modifiers = {}\n\nfor update in concurrent_updates:\n if update.get(\"context_modifier\"):\n queued_modifiers[update[\"tool_id\"]].append(update[\"context_modifier\"])\n\nfor tool in original_batch_order:\n for modifier in queued_modifiers.get(tool[\"id\"], []):\n context = modifier(context)\n```\n\nここは教材 repo でも簡略化しすぎず、しかし主線を崩さずに教えられる重要点です。\n\n## 開発者が持つべき図\n\n```text\ntool_use blocks\n |\n v\npartition by concurrency safety\n |\n +-- safe batch ----------> concurrent execution\n | |\n | +-- progress updates\n | +-- final results\n | +-- queued context modifiers\n |\n +-- exclusive batch -----> serial execution\n |\n +-- direct result\n +-- direct context update\n```\n\n## なぜ後半では dispatch map より重要になるのか\n\n小さい demo では:\n\n```python\nhandlers[tool_name](tool_input)\n```\n\nで十分です。\n\nしかし高完成度 agent で本当に難しいのは、正しい handler を呼ぶことそのものではありません。\n\n難しいのは:\n\n- 複数 tool を安全に調度する\n- progress を見えるようにする\n- 結果順を安定させる\n- 共有 context を非決定的にしない\n\nだからこそ tool execution runtime は独立した bridge doc として教える価値があります。\n"
},
{
"version": "s03",
+ "slug": "s03-todo-write",
"locale": "ja",
"title": "s03: TodoWrite",
- "content": "# s03: TodoWrite\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"計画のないエージェントは行き当たりばったり\"* -- まずステップを書き出し、それから実行。\n\n## 問題\n\nマルチステップのタスクで、モデルは途中で迷子になる。作業を繰り返したり、ステップを飛ばしたり、脱線したりする。長い会話になるほど悪化する -- ツール結果がコンテキストを埋めるにつれ、システムプロンプトの影響力が薄れる。10ステップのリファクタリングでステップ1-3を完了した後、残りを忘れて即興を始めてしまう。\n\n## 解決策\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 仕組み\n\n1. TodoManagerはアイテムのリストをステータス付きで保持する。`in_progress`にできるのは同時に1つだけ。\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo`ツールは他のツールと同様にディスパッチマップに追加される。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nagリマインダーが、モデルが3ラウンド以上`todo`を呼ばなかった場合にナッジを注入する。\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos. \",\n })\n```\n\n「一度にin_progressは1つだけ」の制約が逐次的な集中を強制し、nagリマインダーが説明責任を生む。\n\n## s02からの変更点\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|----------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses |\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n"
+ "kind": "chapter",
+ "filename": "s03-todo-write.md",
+ "content": "# s03: TodoWrite\n\n`s00 > s01 > s02 > [ s03 ] > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *planning は model の代わりに考えるためのものではありません。いま何をやっているかを、外から見える state にするためのものです。*\n\n## この章が解く問題\n\n`s02` まで来ると agent はすでに、\n\n- file を読む\n- file を書く\n- command を実行する\n\nことができます。\n\nするとすぐに別の問題が出ます。\n\n- multi-step task で一歩前の確認を忘れる\n- もう終えた確認をまた繰り返す\n- 最初は計画しても、数 turn 後には即興に戻る\n\nこれは model が「考えられない」からではありません。\n\n問題は、\n\n**現在の plan を explicit に置いておく stable state がないこと**\n\nです。\n\nこの章で足すのはより強い tool ではなく、\n\n**今の session で何をどの順で進めているかを外部状態として見えるようにする仕組み**\n\nです。\n\n## 先に言葉をそろえる\n\n### session 内 planning とは何か\n\nここで扱う planning は long-term project management ではありません。\n\n意味は、\n\n> 今回の user request を終えるために、直近の数手を外へ書き出し、途中で更新し続けること\n\nです。\n\n### todo とは何か\n\n`todo` は特定 product の固有名詞として覚える必要はありません。\n\nこの章では単に、\n\n> model が current plan を更新するための入口\n\nとして使います。\n\n### active step とは何か\n\n`active step` は、\n\n> いま本当に進めている 1 手\n\nです。\n\n教材版では `in_progress` で表します。\n\nここで狙っているのは形式美ではなく、\n\n**同時にあれもこれも進めて plan をぼかさないこと**\n\nです。\n\n### reminder とは何か\n\nreminder は model の代わりに plan を作るものではありません。\n\n意味は、\n\n> 数 turn 連続で plan 更新を忘れたときに、軽く plan へ意識を戻すナッジ\n\nです。\n\n## 最初に強調したい境界\n\nこの章は task system ではありません。\n\n`s03` で扱うのは、\n\n- session 内の軽量な current plan\n- 進行中の focus を保つための外部状態\n- turn ごとに書き換わりうる planning panel\n\nです。\n\nここでまだ扱わないもの:\n\n- durable task board\n- dependency graph\n- multi-agent 共有 task graph\n- background runtime task manager\n\nそれらは `s12-s14` であらためて教えます。\n\nこの境界を守らないと、初心者はすぐに次を混同します。\n\n- 今この session で次にやる一手\n- system 全体に長く残る work goal\n\n## 最小心智モデル\n\nこの章を最も簡単に捉えるなら、plan はこういう panel です。\n\n```text\nuser が大きな仕事を頼む\n |\n v\nmodel が今の plan を書き出す\n |\n v\nplan state\n - [ ] まだ着手していない\n - [>] いま進めている\n - [x] 完了した\n |\n v\n1 手進むたびに更新する\n```\n\nつまり流れはこうです。\n\n1. まず current work を数手に割る\n2. 1 つを `in_progress` にする\n3. 終わったら `completed` にする\n4. 次の 1 つを `in_progress` にする\n5. しばらく更新がなければ reminder する\n\nこの 5 手が見えていれば、この章の幹はつかめています。\n\n## この章の核になるデータ構造\n\n### 1. PlanItem\n\n最小の item は次のように考えられます。\n\n```python\n{\n \"content\": \"Read the failing test\",\n \"status\": \"pending\" | \"in_progress\" | \"completed\",\n \"activeForm\": \"Reading the failing test\",\n}\n```\n\n意味は単純です。\n\n- `content`: 何をするか\n- `status`: いまどの段階か\n- `activeForm`: 実行中に自然文でどう見せるか\n\n教材コードによっては `id` や `text` を使っていても本質は同じです。\n\n### 2. PlanningState\n\nitem だけでは足りません。\n\nplan 全体には最低限、次の running state も要ります。\n\n```python\n{\n \"items\": [...],\n \"rounds_since_update\": 0,\n}\n```\n\n`rounds_since_update` の意味は、\n\n> 何 turn 連続で plan が更新されていないか\n\nです。\n\nこの値があるから reminder を出せます。\n\n### 3. 状態制約\n\n教材版では次の制約を置くのが有効です。\n\n```text\n同時に in_progress は最大 1 つ\n```\n\nこれは宇宙の真理ではありません。 \nでも初学者にとっては非常に良い制約です。\n\n理由は単純で、\n\n**current focus を system 側から明示できる**\n\nからです。\n\n## 最小実装を段階で追う\n\n### 第 1 段階: plan manager を用意する\n\n```python\nclass TodoManager:\n def __init__(self):\n self.items = []\n```\n\n最初はこれで十分です。\n\nここで導入したいのは UI ではなく、\n\n> plan を model の頭の中ではなく harness 側の state として持つ\n\nという発想です。\n\n### 第 2 段階: plan 全体を更新できるようにする\n\n教材版では item をちまちま差分更新するより、\n\n**現在の plan を丸ごと更新する**\n\n方が理解しやすいです。\n\n```python\ndef update(self, items: list) -> str:\n validated = []\n in_progress_count = 0\n\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n\n validated.append({\n \"content\": item[\"content\"],\n \"status\": status,\n \"activeForm\": item.get(\"activeForm\", \"\"),\n })\n\n if in_progress_count > 1:\n raise ValueError(\"Only one item can be in_progress\")\n\n self.items = validated\n return self.render()\n```\n\nここでやっていることは 2 つです。\n\n- current plan を受け取る\n- 状態制約をチェックする\n\n### 第 3 段階: render して可読にする\n\n```python\ndef render(self) -> str:\n lines = []\n for item in self.items:\n marker = {\n \"pending\": \"[ ]\",\n \"in_progress\": \"[>]\",\n \"completed\": \"[x]\",\n }[item[\"status\"]]\n lines.append(f\"{marker} {item['content']}\")\n return \"\\n\".join(lines)\n```\n\nrender の価値は見た目だけではありません。\n\nplan が text として安定して見えることで、\n\n- user が current progress を理解しやすい\n- model も自分が何をどこまで進めたか確認しやすい\n\n状態になります。\n\n### 第 4 段階: `todo` を 1 つの tool として loop へ接ぐ\n\n```python\nTOOL_HANDLERS = {\n \"read_file\": run_read,\n \"write_file\": run_write,\n \"edit_file\": run_edit,\n \"bash\": run_bash,\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\nここで重要なのは、plan 更新を特別扱いの hidden logic にせず、\n\n**tool call として explicit に loop へ入れる**\n\nことです。\n\n### 第 5 段階: 数 turn 更新がなければ reminder を挿入する\n\n```python\nif rounds_since_update >= 3:\n results.insert(0, {\n \"type\": \"text\",\n \"text\": \"Refresh your plan before continuing. \",\n })\n```\n\nこの reminder の意味は「system が代わりに plan を立てる」ではありません。\n\n正しくは、\n\n> plan state がしばらく stale なので、model に current plan を更新させる\n\nです。\n\n## main loop に何が増えるのか\n\nこの章以後、main loop は `messages` だけを持つわけではなくなります。\n\n持つ state が少なくとも 2 本になります。\n\n```text\nmessages\n -> model が読む会話と観察の history\n\nplanning state\n -> 今回の session で current work をどう進めるか\n```\n\nこれがこの章の本当の upgrade です。\n\nagent はもはや単に chat history を伸ばしているだけではなく、\n\n**「いま何をしているか」を外から見える panel として維持する**\n\nようになります。\n\n## なぜここで task graph まで教えないのか\n\n初心者は planning の話が出るとすぐ、\n\n> だったら durable task board も同時に作った方がよいのでは\n\nと考えがちです。\n\nでも教学順序としては早すぎます。\n\n理由は、ここで理解してほしいのが\n\n**session 内の軽い plan と、長く残る durable work graph は別物**\n\nという境界だからです。\n\n`s03` は current focus の外部化です。 \n`s12` 以降は durable task system です。\n\n順番を守ると、後で混ざりにくくなります。\n\n## 初学者が混ぜやすいポイント\n\n### 1. plan を model の頭の中だけに置く\n\nこれでは multi-step work がすぐ漂います。\n\n### 2. `in_progress` を複数許してしまう\n\ncurrent focus がぼやけ、plan が checklist ではなく wish list になります。\n\n### 3. plan を一度書いたら更新しない\n\nそれでは plan は living state ではなく dead note です。\n\n### 4. reminder を system の強制 planning と誤解する\n\nreminder は軽いナッジであって、plan の中身を system が代行するものではありません。\n\n### 5. session plan と durable task graph を同一視する\n\nこの章で扱うのは current request を進めるための軽量 state です。\n\n## この章を読み終えたら何が言えるべきか\n\n1. planning は model の代わりに考えることではなく、current progress を外部 state にすること\n2. session plan は durable task system とは別層であること\n3. `in_progress` を 1 つに絞ると初心者の心智が安定すること\n\n## 一文で覚える\n\n**TodoWrite とは、「次に何をするか」を model の頭の中ではなく、system が見える外部 state に書き出すことです。**\n"
},
{
"version": "s04",
+ "slug": "s04-subagent",
"locale": "ja",
"title": "s04: Subagents",
- "content": "# s04: Subagents\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"大きなタスクを分割し、各サブタスクにクリーンなコンテキストを\"* -- サブエージェントは独立した messages[] を使い、メイン会話を汚さない。\n\n## 問題\n\nエージェントが作業するにつれ、messages配列は膨張し続ける。すべてのファイル読み取り、すべてのbash出力がコンテキストに永久に残る。「このプロジェクトはどのテストフレームワークを使っているか」という質問は5つのファイルを読む必要があるかもしれないが、親に必要なのは「pytest」という答えだけだ。\n\n## 解決策\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## 仕組み\n\n1. 親に`task`ツールを追加する。子は`task`を除くすべての基本ツールを取得する(再帰的な生成は不可)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. サブエージェントは`messages=[]`で開始し、自身のループを実行する。最終テキストだけが親に返る。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n子のメッセージ履歴全体(30回以上のツール呼び出し)は破棄される。親は1段落の要約を通常の`tool_result`として受け取る。\n\n## s03からの変更点\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n"
+ "kind": "chapter",
+ "filename": "s04-subagent.md",
+ "content": "# s04: Subagents\n\n`s00 > s01 > s02 > s03 > [ s04 ] > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *大きな仕事を全部 1 つの context に詰め込む必要はありません。* \n> subagent の価値は「model を 1 個増やすこと」ではなく、「clean な別 context を 1 つ持てること」にあります。\n\n## この章が解く問題\n\nagent がいろいろな調査や実装を進めると、親の `messages` はどんどん長くなります。\n\nたとえば user の質問が単に\n\n> 「この project は何の test framework を使っているの?」\n\nだけでも、親 agent は答えるために、\n\n- `pyproject.toml` を読む\n- `requirements.txt` を読む\n- `pytest` を検索する\n- 実際に test command を走らせる\n\nかもしれません。\n\nでも本当に親に必要な最終答えは、\n\n> 「主に `pytest` を使っています」\n\nの一文だけかもしれません。\n\nもしこの途中作業を全部親 context に積み続けると、あとで別の質問に答えるときに、\n\n- さっきの局所調査の noise\n- 大量の file read\n- 一時的な bash 出力\n\nが main context を汚染します。\n\nsubagent が解くのはこの問題です。\n\n**局所 task を別 context に閉じ込め、親には必要な summary だけを持ち帰る**\n\nのがこの章の主線です。\n\n## 先に言葉をそろえる\n\n### 親 agent とは何か\n\nいま user と直接やり取りし、main `messages` を持っている actor が親 agent です。\n\n### 子 agent とは何か\n\n親が一時的に派生させ、特定の subtask だけを処理させる actor が子 agent、つまり subagent です。\n\n### context isolation とは何か\n\nこれは単に、\n\n- 親は親の `messages`\n- 子は子の `messages`\n\nを持ち、\n\n> 子の途中経過が自動で親 history に混ざらないこと\n\nを指します。\n\n## 最小心智モデル\n\nこの章は次の図でほぼ言い切れます。\n\n```text\nParent agent\n |\n | 1. 局所 task を外へ出すと決める\n v\nSubagent\n |\n | 2. 自分の context で file read / search / tool execution\n v\nSummary\n |\n | 3. 必要な結果だけを親へ返す\n v\nParent agent continues\n```\n\nここで一番大事なのは次の 1 文です。\n\n**subagent の価値は別 model instance ではなく、別 state boundary にある**\n\nということです。\n\n## 最小実装を段階で追う\n\n### 第 1 段階: 親に `task` tool を持たせる\n\n親 agent は model が明示的に言える入口を持つ必要があります。\n\n> この局所仕事は clean context に外注したい\n\nその最小 schema は非常に簡単で構いません。\n\n```python\n{\n \"name\": \"task\",\n \"description\": \"Run a subtask in a clean context and return a summary.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"prompt\": {\"type\": \"string\"}\n },\n \"required\": [\"prompt\"]\n }\n}\n```\n\n### 第 2 段階: subagent は自分専用の `messages` で始める\n\nsubagent の本体はここです。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n ...\n```\n\n親の `messages` をそのまま共有しないことが、最小の isolation です。\n\n### 第 3 段階: 子に渡す tool は絞る\n\nsubagent は親と完全に同じ tool set を持つ必要はありません。\n\nむしろ最初は絞った方がよいです。\n\nたとえば、\n\n- `read_file`\n- 検索系 tool\n- read-only 寄りの `bash`\n\nだけを持たせ、\n\n- さらに `task` 自体は子に渡さない\n\nようにすれば、無限再帰を避けやすくなります。\n\n### 第 4 段階: 子は最後に summary だけ返す\n\n一番大事なのはここです。\n\nsubagent は内部 history を親に全部戻しません。\n\n戻すのは必要な summary だけです。\n\n```python\nreturn {\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": summary_text,\n}\n```\n\nこれにより親 context は、\n\n- 必要な答え\n- もしくは短い結論\n\nだけを保持し、局所ノイズから守られます。\n\n## この章の核になるデータ構造\n\nこの章で 1 つだけ覚えるなら、次の骨格です。\n\n```python\nclass SubagentContext:\n messages: list\n tools: list\n handlers: dict\n max_turns: int\n```\n\n意味は次の通りです。\n\n- `messages`: 子自身の context\n- `tools`: 子が使える道具\n- `handlers`: その tool が実際にどの code を呼ぶか\n- `max_turns`: 子が無限に走り続けないための上限\n\nつまり subagent は「関数呼び出し」ではなく、\n\n**自分の state と tool boundary を持つ小さな agent**\n\nです。\n\n## なぜ本当に useful なのか\n\n### 1. 親 context を軽く保てる\n\n局所 task の途中経過が main conversation に積み上がりません。\n\n### 2. subtask の prompt を鋭くできる\n\n子に渡す prompt は次のように非常に集中できます。\n\n- 「この directory の test framework を 1 文で答えて」\n- 「この file の bug を探して原因だけ返して」\n- 「3 file を読んで module 関係を summary して」\n\n### 3. 後の multi-agent chapter の準備になる\n\nsubagent は long-lived teammate より前に学ぶべき最小の delegation model です。\n\nまず「1 回限りの clean delegation」を理解してから、\n\n- persistent teammate\n- structured protocol\n- autonomous claim\n\nへ進むと心智がずっと滑らかになります。\n\n## 0-to-1 の実装順序\n\n### Version 1: blank-context subagent\n\n最初はこれで十分です。\n\n- `task` tool\n- `run_subagent(prompt)`\n- 子専用 `messages`\n- 最後に summary を返す\n\n### Version 2: tool set を制限する\n\n親より小さく安全な tool set を渡します。\n\n### Version 3: safety bound を足す\n\n最低限、\n\n- 最大 turn 数\n- tool failure 時の終了条件\n\nは入れてください。\n\n### Version 4: fork を検討する\n\nこの順番を守ることが大事です。\n\n最初から fork を入れる必要はありません。\n\n## fork とは何か、なぜ「次の段階」なのか\n\n最小 subagent は blank context から始めます。\n\nでも subtask によっては、親が直前まで話していた内容を知らないと困ることがあります。\n\nたとえば、\n\n> 「さっき決めた方針に沿って、この module へ test を追加して」\n\nのような場面です。\n\nそのとき使うのが `fork` です。\n\n```python\nsub_messages = list(parent_messages)\nsub_messages.append({\"role\": \"user\", \"content\": prompt})\n```\n\nfork の本質は、\n\n**空白から始めるのではなく、親の既存 context を引き継いで子を始めること**\n\nです。\n\nただし teaching order としては、blank-context subagent を理解してからの方が安全です。\n\n先に fork を入れると、初心者は\n\n- 何が isolation で\n- 何が inherited context なのか\n\nを混ぜやすくなります。\n\n## 初学者が混ぜやすいポイント\n\n### 1. subagent を「並列アピール機能」だと思う\n\nsubagent の第一目的は concurrency 自慢ではなく、context hygiene です。\n\n### 2. 子の history を全部親へ戻してしまう\n\nそれでは isolation の価値がほとんど消えます。\n\n### 3. 最初から役割を増やしすぎる\n\nexplorer、reviewer、planner、tester などを一気に作る前に、\n\n**clean context の一回限り worker**\n\nを正しく作る方が先です。\n\n### 4. 子に `task` を持たせて無限に spawn させる\n\n境界がないと recursion で system が荒れます。\n\n### 5. `max_turns` のような safety bound を持たない\n\n局所 task だからこそ、終わらない子を放置しない設計が必要です。\n\n## この章を読み終えたら何が言えるべきか\n\n1. subagent の価値は clean context を作ることにある\n2. 子は親と別の `messages` を持つべきである\n3. 親へ戻すのは内部 history 全量ではなく summary でよい\n\n## 一文で覚える\n\n**Subagent とは、局所 task を clean context へ切り出し、親には必要な結論だけを持ち帰るための最小 delegation mechanism です。**\n"
},
{
"version": "s05",
+ "slug": "s05-skill-loading",
"locale": "ja",
"title": "s05: Skills",
- "content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"必要な知識を、必要な時に読み込む\"* -- system prompt ではなく tool_result で注入。\n\n## 問題\n\nエージェントにドメイン固有のワークフローを遵守させたい: gitの規約、テストパターン、コードレビューチェックリスト。すべてをシステムプロンプトに入れると、使われないスキルにトークンを浪費する。10スキル x 2000トークン = 20,000トークン、ほとんどが任意のタスクに無関係だ。\n\n## 解決策\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n第1層: スキル*名*をシステムプロンプトに(低コスト)。第2層: スキル*本体*をtool_resultに(オンデマンド)。\n\n## 仕組み\n\n1. 各スキルは `SKILL.md` ファイルを含むディレクトリとして配置される。\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoaderが `SKILL.md` を再帰的に探索し、ディレクトリ名をスキル識別子として使用する。\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n \"\n```\n\n3. 第1層はシステムプロンプトに配置。第2層は通常のツールハンドラ。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nモデルはどのスキルが存在するかを知り(低コスト)、関連する時にだけ読み込む(高コスト)。\n\n## s04からの変更点\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n"
+ "kind": "chapter",
+ "filename": "s05-skill-loading.md",
+ "content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *\"必要な知識を、必要な時に読み込む\"* -- system prompt ではなく tool_result で注入。\n>\n> **Harness 層**: オンデマンド知識 -- モデルが求めた時だけ渡すドメイン専門性。\n\n## 問題\n\nエージェントにドメイン固有のワークフローを遵守させたい: gitの規約、テストパターン、コードレビューチェックリスト。すべてをシステムプロンプトに入れると、使われないスキルにトークンを浪費する。10スキル x 2000トークン = 20,000トークン、ほとんどが任意のタスクに無関係だ。\n\n## 解決策\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n第1層: スキル*名*をシステムプロンプトに(低コスト)。第2層: スキル*本体*をtool_resultに(オンデマンド)。\n\n## 仕組み\n\n1. 各スキルは `SKILL.md` ファイルを含むディレクトリとして配置される。\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoaderが `SKILL.md` を再帰的に探索し、ディレクトリ名をスキル識別子として使用する。\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n \"\n```\n\n3. 第1層はシステムプロンプトに配置。第2層は通常のツールハンドラ。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nモデルはどのスキルが存在するかを知り(低コスト)、関連する時にだけ読み込む(高コスト)。\n\n## s04からの変更点\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n\n## 高完成度システムではどう広がるか\n\nこの章の核心は 2 層モデルです。 \nまず軽い一覧で「何があるか」を知らせ、必要になったときだけ本文を深く読み込む。これはそのまま有効です。\n\nより完成度の高いシステムでは、その周りに次のような広がりが出ます。\n\n| 観点 | 教材版 | 高完成度システム |\n|------|--------|------------------|\n| 発見レイヤー | プロンプト内に名前一覧 | 予算付きの専用インベントリやリマインダ面 |\n| 読み込み | `load_skill` が本文を返す | 同じ文脈へ注入、別ワーカーで実行、補助コンテキストとして添付など |\n| ソース | `skills/` ディレクトリのみ | user、project、bundled、plugin、外部ソースなど |\n| 適用範囲 | 常に見える | タスク種別、触ったファイル、明示指示に応じて有効化 |\n| 引数 | なし | スキルへパラメータやテンプレート値を渡せる |\n| ライフサイクル | 一度読むだけ | compact や再開後に復元されることがある |\n| ガードレール | なし | スキルごとの許可範囲や行動制約を持てる |\n\n教材としては、2 層モデルだけで十分です。 \nここで学ぶべき本質は:\n\n**専門知識は最初から全部抱え込まず、必要な時だけ深く読み込む** \nという設計です。\n"
},
{
"version": "s06",
+ "slug": "s06-context-compact",
"locale": "ja",
"title": "s06: Context Compact",
- "content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"コンテキストはいつか溢れる、空ける手段が要る\"* -- 3層圧縮で無限セッションを実現。\n\n## 問題\n\nコンテキストウィンドウは有限だ。1000行のファイルに対する`read_file`1回で約4000トークンを消費する。30ファイルを読み20回のbashコマンドを実行すると、100,000トークン超。圧縮なしでは、エージェントは大規模コードベースで作業できない。\n\n## 解決策\n\n積極性を段階的に上げる3層構成:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 仕組み\n\n1. **第1層 -- micro_compact**: 各LLM呼び出しの前に、古いツール結果をプレースホルダーに置換する。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第2層 -- auto_compact**: トークンが閾値を超えたら、完全なトランスクリプトをディスクに保存し、LLMに要約を依頼する。\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第3層 -- manual compact**: `compact`ツールが同じ要約処理をオンデマンドでトリガーする。\n\n4. ループが3層すべてを統合する:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\nトランスクリプトがディスク上に完全な履歴を保持する。何も真に失われず、アクティブなコンテキストの外に移動されるだけ。\n\n## s05からの変更点\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (micro-compactが古い結果を置換するのを観察する)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n"
+ "kind": "chapter",
+ "filename": "s06-context-compact.md",
+ "content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *\"コンテキストはいつか溢れる、空ける手段が要る\"* -- 4レバー圧縮で無限セッションを実現。\n\n## 問題\n\nコンテキストウィンドウは有限だ。1000行のファイルに対する`read_file`1回で約4000トークンを消費する。30ファイルを読み20回のbashコマンドを実行すると、100,000トークン超。圧縮なしでは、エージェントは大規模コードベースで作業できない。\n\n## 解決策\n\nツール出力時から手動トリガーまで、4つの圧縮レバー:\n\n```\nEvery tool call:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Lever 0: persisted-output] (at tool execution time)\n Large outputs (>50KB, bash >30KB) are written to disk\n and replaced with a preview marker.\n |\n v\n[Lever 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n (preserves read_file results as reference material)\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Lever 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Lever 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 仕組み\n\n0. **レバー 0 -- persisted-output**: ツール出力がサイズ閾値を超えた場合、ディスクに書き込みプレビューマーカーに置換する。巨大な出力がコンテキストウィンドウに入るのを防ぐ。\n\n```python\nPERSIST_OUTPUT_TRIGGER_CHARS_DEFAULT = 50000\nPERSIST_OUTPUT_TRIGGER_CHARS_BASH = 30000 # bashはより低い閾値を使用\n\ndef maybe_persist_output(tool_use_id, output, trigger_chars=None):\n if len(output) <= trigger:\n return output\n stored_path = _persist_tool_result(tool_use_id, output)\n return _build_persisted_marker(stored_path, output)\n # Returns: \n # Output too large (48.8KB). Full output saved to: .task_outputs/tool-results/abc123.txt\n # Preview (first 2.0KB):\n # ... first 2000 chars ...\n # \n```\n\nモデルは後から`read_file`で保存パスにアクセスし、完全な内容を取得できる。\n\n1. **レバー 1 -- micro_compact**: 各LLM呼び出しの前に、古いツール結果をプレースホルダーに置換する。`read_file`の結果は参照資料として保持する。\n\n```python\nPRESERVE_RESULT_TOOLS = {\"read_file\"}\n\ndef micro_compact(messages: list) -> list:\n tool_results = [...] # collect all tool_result entries\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for part in tool_results[:-KEEP_RECENT]:\n if tool_name in PRESERVE_RESULT_TOOLS:\n continue # keep reference material\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **レバー 2 -- auto_compact**: トークンが閾値を超えたら、完全なトランスクリプトをディスクに保存し、LLMに要約を依頼する。\n\n```python\ndef auto_compact(messages: list) -> list:\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n ]\n```\n\n3. **レバー 3 -- manual compact**: `compact`ツールが同じ要約処理をオンデマンドでトリガーする。\n\n4. ループが4つのレバーすべてを統合する:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Lever 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Lever 2\n response = client.messages.create(...)\n # ... tool execution with persisted-output ... # Lever 0\n if manual_compact:\n messages[:] = auto_compact(messages) # Lever 3\n```\n\nトランスクリプトがディスク上に完全な履歴を保持する。大きな出力は`.task_outputs/tool-results/`に保存される。何も真に失われず、アクティブなコンテキストの外に移動されるだけ。\n\n## s05からの変更点\n\n| Component | Before (s05) | After (s06) |\n|-------------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Four-lever compression |\n| Persisted-output | None | Large outputs -> disk + preview |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (micro-compactが古い結果を置換するのを観察する)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n\n## 高完成度システムではどう広がるか\n\n教材版は compact を理解しやすくするために、仕組みを大きく 4 本に絞っています。 \nより完成度の高いシステムでは、その周りに追加の段階が増えます。\n\n| レイヤー | 教材版 | 高完成度システム |\n|---------|--------|------------------|\n| 大きな出力 | 大きすぎる結果をディスクへ逃がす | 複数ツールの合計量も見ながら、文脈に入る前に予算調整する |\n| 軽い整理 | 単純な micro-compact | フル要約の前に複数の軽量整理パスを入れる |\n| フル compact | 閾値を超えたら要約 | 事前 compact、回復用 compact、エラー後 compact など役割分担が増える |\n| 回復 | 要約 1 本に置き換える | compact 後に最近のファイル、計画、スキル、非同期状態などを戻す |\n| 起動条件 | 自動または手動ツール | ユーザー操作、内部閾値、回復処理など複数の入口 |\n\nここで覚えるべき核心は変わりません。\n\n**compact は「履歴を捨てること」ではなく、「細部をアクティブ文脈の外へ移し、連続性を保つこと」** \nです。\n"
},
{
"version": "s07",
+ "slug": "s07-permission-system",
"locale": "ja",
- "title": "s07: Task System",
- "content": "# s07: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"大きな目標を小タスクに分解し、順序付けし、ディスクに記録する\"* -- ファイルベースのタスクグラフ、マルチエージェント協調の基盤。\n\n## 問題\n\ns03のTodoManagerはメモリ上のフラットなチェックリストに過ぎない: 順序なし、依存関係なし、ステータスは完了か未完了のみ。実際の目標には構造がある -- タスクBはタスクAに依存し、タスクCとDは並行実行でき、タスクEはCとDの両方を待つ。\n\n明示的な関係がなければ、エージェントは何が実行可能で、何がブロックされ、何が同時に走れるかを判断できない。しかもリストはメモリ上にしかないため、コンテキスト圧縮(s06)で消える。\n\n## 解決策\n\nフラットなチェックリストをディスクに永続化する**タスクグラフ**に昇格させる。各タスクは1つのJSONファイルで、ステータス・前方依存(`blockedBy`)・後方依存(`blocks`)を持つ。タスクグラフは常に3つの問いに答える:\n\n- **何が実行可能か?** -- `pending`ステータスで`blockedBy`が空のタスク。\n- **何がブロックされているか?** -- 未完了の依存を待つタスク。\n- **何が完了したか?** -- `completed`のタスク。完了時に後続タスクを自動的にアンブロックする。\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nタスクグラフ (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\n順序: task 1 は 2 と 3 より先に完了する必要がある\n並行: task 2 と 3 は同時に実行できる\n依存: task 4 は 2 と 3 の両方を待つ\nステータス: pending -> in_progress -> completed\n```\n\nこのタスクグラフは s07 以降の全メカニズムの協調バックボーンとなる: バックグラウンド実行(s08)、マルチエージェントチーム(s09+)、worktree分離(s12)はすべてこの同じ構造を読み書きする。\n\n## 仕組み\n\n1. **TaskManager**: タスクごとに1つのJSONファイル、依存グラフ付きCRUD。\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **依存解除**: タスク完了時に、他タスクの`blockedBy`リストから完了IDを除去し、後続タスクをアンブロックする。\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **ステータス遷移 + 依存配線**: `update`がステータス変更と依存エッジを担う。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. 4つのタスクツールをディスパッチマップに追加する。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\ns07以降、タスクグラフがマルチステップ作業のデフォルト。s03のTodoは軽量な単一セッション用チェックリストとして残る。\n\n## s06からの変更点\n\n| コンポーネント | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 計画モデル | フラットチェックリスト (メモリ) | 依存関係付きタスクグラフ (ディスク) |\n| 関係 | なし | `blockedBy` + `blocks` エッジ |\n| ステータス追跡 | 完了か未完了 | `pending` -> `in_progress` -> `completed` |\n| 永続性 | 圧縮で消失 | 圧縮・再起動後も存続 |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n"
+ "title": "s07: Permission System",
+ "kind": "chapter",
+ "filename": "s07-permission-system.md",
+ "content": "# s07: Permission System\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > [ s07 ] > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *model は「こうしたい」と提案できます。けれど本当に実行する前には、必ず安全 gate を通さなければなりません。*\n\n## この章の核心目標\n\n`s06` まで来ると agent はすでに、\n\n- file を読む\n- file を書く\n- command を実行する\n- plan を持つ\n- context を compact する\n\nことができます。\n\n能力が増えるほど、当然危険も増えます。\n\n- 間違った file を書き換える\n- 危険な shell command を実行する\n- user がまだ許可していない操作に踏み込む\n\nだからここから先は、\n\n**「model の意図」がそのまま「実行」へ落ちる**\n\n構造をやめなければなりません。\n\nこの章で入れるのは、\n\n**tool request を実行前に判定する permission pipeline**\n\nです。\n\n## 併読すると楽になる資料\n\n- model の提案と system の実実行が混ざるなら [`s00a-query-control-plane.md`](./s00a-query-control-plane.md)\n- なぜ tool request を直接 handler に落としてはいけないか不安なら [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md)\n- `PermissionRule`、`PermissionDecision`、`tool_result` が混ざるなら [`data-structures.md`](./data-structures.md)\n\n## 先に言葉をそろえる\n\n### permission system とは何か\n\npermission system は真偽値 1 個ではありません。\n\nむしろ次の 3 問に順番に答える pipeline です。\n\n1. これは即拒否すべきか\n2. 自動で許可してよいか\n3. 残りは user に確認すべきか\n\n### permission mode とは何か\n\nmode は、その session 全体の安全姿勢です。\n\nたとえば、\n\n- 慎重に進める\n- 読み取りだけ許す\n- 安全そうなものは自動通過させる\n\nといった大きな方針です。\n\n### rule とは何か\n\nrule は、\n\n> ある tool request に当たったらどう振る舞うか\n\nを表す小さな条項です。\n\n最小形なら次のような record で表せます。\n\n```python\n{\n \"tool\": \"bash\",\n \"content\": \"sudo *\",\n \"behavior\": \"deny\",\n}\n```\n\n意味は、\n\n- `bash` に対して\n- command 内容が `sudo *` に当たれば\n- 拒否する\n\nです。\n\n## 最小 permission system の形\n\n0 から手で作るなら、最小で正しい pipeline は 4 段で十分です。\n\n```text\ntool_call\n |\n v\n1. deny rules\n -> 危険なら即拒否\n |\n v\n2. mode check\n -> 現在 mode に照らして判定\n |\n v\n3. allow rules\n -> 安全で明確なら自動許可\n |\n v\n4. ask user\n -> 残りは確認に回す\n```\n\nこの 4 段で teaching repo の主線としては十分に強いです。\n\n## なぜ順番がこの形なのか\n\n### 1. deny を先に見る理由\n\nある種の request は mode に関係なく危険です。\n\nたとえば、\n\n- 明白に危険な shell command\n- workspace の外へ逃げる path\n\nなどです。\n\nこうしたものは「いま auto mode だから」などの理由で通すべきではありません。\n\n### 2. mode を次に見る理由\n\nmode はその session の大きな姿勢だからです。\n\nたとえば `plan` mode なら、\n\n> まだ review / analysis 段階なので write 系をまとめて抑える\n\nという全体方針を早い段で効かせたいわけです。\n\n### 3. allow を後に見る理由\n\ndeny と mode を抜けたあとで、\n\n> これは何度も出てくる安全な操作だから自動で通してよい\n\nというものを allow します。\n\nたとえば、\n\n- `read_file`\n- code search\n- `git status`\n\nなどです。\n\n### 4. ask を最後に置く理由\n\n前段で明確に決められなかった灰色領域だけを user に回すためです。\n\nこれで、\n\n- 危険なものは system が先に止める\n- 明らかに安全なものは system が先に通す\n- 本当に曖昧なものだけ user が判断する\n\nという自然な構図になります。\n\n## 最初に実装すると良い 3 つの mode\n\n最初から mode を増やしすぎる必要はありません。\n\nまずは次の 3 つで十分です。\n\n| mode | 意味 | 向いている場面 |\n|---|---|---|\n| `default` | rule に当たらないものは user に確認 | 普通の対話 |\n| `plan` | write を止め、read 中心で進める | planning / review / analysis |\n| `auto` | 明らかに安全な read は自動許可 | 高速探索 |\n\nこの 3 つだけでも、\n\n- 慎重さ\n- 計画モード\n- 流暢さ\n\nのバランスを十分教えられます。\n\n## この章の核になるデータ構造\n\n### 1. PermissionRule\n\n```python\nPermissionRule = {\n \"tool\": str,\n \"behavior\": \"allow\" | \"deny\" | \"ask\",\n \"path\": str | None,\n \"content\": str | None,\n}\n```\n\n必ずしも最初から `path` と `content` の両方を使う必要はありません。\n\nでも少なくとも rule は次を表現できる必要があります。\n\n- どの tool に対する rule か\n- 当たったらどう振る舞うか\n\n### 2. Permission Mode\n\n```python\nmode = \"default\" | \"plan\" | \"auto\"\n```\n\nこれは個々の rule ではなく session 全体の posture です。\n\n### 3. PermissionDecision\n\n```python\n{\n \"behavior\": \"allow\" | \"deny\" | \"ask\",\n \"reason\": \"why this decision was made\",\n}\n```\n\nここで `reason` を持つのが大切です。\n\nなぜなら permission system は「通した / 止めた」だけではなく、\n\n**なぜそうなったかを説明できるべき**\n\nだからです。\n\n## 最小実装を段階で追う\n\n### 第 1 段階: 判定関数を書く\n\n```python\ndef check_permission(tool_name: str, tool_input: dict) -> dict:\n # 1. deny rules\n for rule in deny_rules:\n if matches(rule, tool_name, tool_input):\n return {\"behavior\": \"deny\", \"reason\": \"matched deny rule\"}\n\n # 2. mode check\n if mode == \"plan\" and tool_name in WRITE_TOOLS:\n return {\"behavior\": \"deny\", \"reason\": \"plan mode blocks writes\"}\n if mode == \"auto\" and tool_name in READ_ONLY_TOOLS:\n return {\"behavior\": \"allow\", \"reason\": \"auto mode allows reads\"}\n\n # 3. allow rules\n for rule in allow_rules:\n if matches(rule, tool_name, tool_input):\n return {\"behavior\": \"allow\", \"reason\": \"matched allow rule\"}\n\n # 4. fallback\n return {\"behavior\": \"ask\", \"reason\": \"needs confirmation\"}\n```\n\n重要なのは code の華やかさではなく、\n\n**先に分類し、その後で分岐する**\n\nという構造です。\n\n### 第 2 段階: tool 実行直前に接ぐ\n\npermission は tool request が来たあと、handler を呼ぶ前に入ります。\n\n```python\ndecision = perms.check(tool_name, tool_input)\n\nif decision[\"behavior\"] == \"deny\":\n return f\"Permission denied: {decision['reason']}\"\n\nif decision[\"behavior\"] == \"ask\":\n ok = ask_user(...)\n if not ok:\n return \"Permission denied by user\"\n\nreturn handler(**tool_input)\n```\n\nこれで初めて、\n\n**tool request と real execution の間に control gate**\n\nが立ちます。\n\n## `bash` を特別に気にする理由\n\nすべての tool の中で `bash` は特別に危険です。\n\nなぜなら、\n\n- `read_file` は読むだけ\n- `write_file` は書くだけ\n- でも `bash` は理論上ほとんど何でもできる\n\nからです。\n\nしたがって `bash` をただの文字列入力として見るのは危険です。\n\n成熟した system では、`bash` を小さな executable language として扱います。\n\n教材版でも最低限、次のような危険要素は先に弾く方がよいです。\n\n- `sudo`\n- `rm -rf`\n- 危険な redirection\n- suspicious command substitution\n- 明白な shell metacharacter chaining\n\n核心は 1 文です。\n\n**bash は普通の text ではなく、可実行 action の記述**\n\nです。\n\n## 初学者が混ぜやすいポイント\n\n### 1. permission を yes/no の 2 値で考える\n\n実際には `deny / allow / ask` の 3 分岐以上が必要です。\n\n### 2. mode を rule の代わりにしようとする\n\nmode は全体 posture、rule は個別条項です。役割が違います。\n\n### 3. `bash` を普通の string と同じ感覚で通す\n\nexecution power が桁違いです。\n\n### 4. deny / allow より先に user へ全部投げる\n\nそれでは system 側の safety design を学べません。\n\n### 5. decision に reason を残さない\n\nあとで「なぜ止まったか」が説明できなくなります。\n\n## 拒否トラッキングの意味\n\n教材コードでは、連続拒否を数える簡単な circuit breaker を持たせるのも有効です。\n\nなぜなら agent が同じ危険 request を何度も繰り返すとき、\n\n- mode が合っていない\n- plan を作り直すべき\n- 別 route を選ぶべき\n\nという合図になるからです。\n\nこれは高度な observability ではなく、\n\n**permission failure も agent の progress 状態の一部である**\n\nと教えるための最小観測です。\n\n## この章を読み終えたら何が言えるべきか\n\n1. model の意図は handler へ直結させず、permission pipeline を通すべき\n2. `default / plan / auto` の 3 mode だけでも十分に teaching mainline が作れる\n3. `bash` は普通の text 入力ではなく、高い実行力を持つ tool なので特別に警戒すべき\n\n## 一文で覚える\n\n**Permission System とは、model の意図をそのまま実行に落とさず、deny / mode / allow / ask の pipeline で安全に変換する層です。**\n"
},
{
"version": "s08",
+ "slug": "s08-hook-system",
"locale": "ja",
- "title": "s08: Background Tasks",
- "content": "# s08: Background Tasks\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"遅い操作はバックグラウンドへ、エージェントは次を考え続ける\"* -- デーモンスレッドがコマンド実行、完了後に通知を注入。\n\n## 問題\n\n一部のコマンドは数分かかる: `npm install`、`pytest`、`docker build`。ブロッキングループでは、モデルはサブプロセスの完了を待って座っている。ユーザーが「依存関係をインストールして、その間にconfigファイルを作って」と言っても、エージェントは並列ではなく逐次的に処理する。\n\n## 解決策\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## 仕組み\n\n1. BackgroundManagerがスレッドセーフな通知キューでタスクを追跡する。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()`がデーモンスレッドを開始し、即座にリターンする。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. サブプロセス完了時に、結果を通知キューへ。\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. エージェントループが各LLM呼び出しの前に通知をドレインする。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\" \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\nループはシングルスレッドのまま。サブプロセスI/Oだけが並列化される。\n\n## s07からの変更点\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n"
+ "title": "s08: Hook System",
+ "kind": "chapter",
+ "filename": "s08-hook-system.md",
+ "content": "# s08: Hook System\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > [ s08 ] > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *ループそのものを書き換えなくても、ライフサイクルの周囲に拡張点を置ける。*\n\n## この章が解決する問題\n\n`s07` までで、agent はかなり実用的になりました。\n\nしかし実際には、ループの外側で足したい振る舞いが増えていきます。\n\n- 監査ログ\n- 実行追跡\n- 通知\n- 追加の安全チェック\n- 実行前後の補助メッセージ\n\nこうした周辺機能を毎回メインループに直接書き込むと、すぐに主線が読みにくくなります。\n\nそこで必要なのが Hook です。\n\n## 主線とどう併読するか\n\n- Hook を「主ループの中へ if/else を足すこと」だと思い始めたら、まず [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md) に戻ります。\n- 主ループ、tool handler、hook の副作用が同じ層に見えてきたら、[`entity-map.md`](./entity-map.md) で「主状態を進めるもの」と「横から観測するもの」を分けます。\n- この先で prompt、recovery、teams まで読むつもりなら、[`s00e-reference-module-map.md`](./s00e-reference-module-map.md) を近くに置いておくと、「control plane + sidecar 拡張」が何度も出てきても崩れにくくなります。\n\n## Hook を最も簡単に言うと\n\nHook は:\n\n**主ループの決まった節目で、追加動作を差し込む拡張点**\n\nです。\n\nここで大切なのは、Hook が主ループの代わりになるわけではないことです。 \n主ループは引き続き:\n\n- モデル呼び出し\n- ツール実行\n- 結果の追記\n\nを担当します。\n\n## 最小の心智モデル\n\n```text\ntool_call from model\n |\n v\n[PreToolUse hooks]\n |\n v\n[execute tool]\n |\n v\n[PostToolUse hooks]\n |\n v\nappend result and continue\n```\n\nこの形なら、ループの主線を壊さずに拡張できます。\n\n## まず教えるべき 3 つのイベント\n\n| イベント | いつ発火するか | 主な用途 |\n|---|---|---|\n| `SessionStart` | セッション開始時 | 初期通知、ウォームアップ |\n| `PreToolUse` | ツール実行前 | 監査、ブロック、補助判断 |\n| `PostToolUse` | ツール実行後 | 結果記録、通知、追跡 |\n\nこれだけで教学版としては十分です。\n\n## 重要な境界\n\n### Hook は主状態遷移を置き換えない\n\nHook がやるのは「観察して補助すること」です。\n\nメッセージ履歴、停止条件、ツール呼び出しの主責任は、あくまでメインループに残します。\n\n### Hook には整ったイベント情報を渡す\n\n理想的には、各 Hook は同じ形の情報を受け取ります。\n\nたとえば:\n\n- `event`\n- `tool_name`\n- `tool_input`\n- `tool_output`\n- `error`\n\nこの形が揃っていると、Hook を増やしても心智が崩れません。\n\n## 最小実装\n\n### 1. 設定を読む\n\n```python\nhooks = {\n \"PreToolUse\": [...],\n \"PostToolUse\": [...],\n \"SessionStart\": [...],\n}\n```\n\n### 2. 実行関数を作る\n\n```python\ndef run_hooks(event_name: str, ctx: dict):\n for hook in hooks.get(event_name, []):\n run_one_hook(hook, ctx)\n```\n\n### 3. ループに接続する\n\n```python\nrun_hooks(\"PreToolUse\", ctx)\noutput = handler(**tool_input)\nrun_hooks(\"PostToolUse\", ctx)\n```\n\n## 初学者が混乱しやすい点\n\n### 1. Hook を第二の主ループのように考える\n\nそうすると制御が分裂して、一気に分かりにくくなります。\n\n### 2. Hook ごとに別のデータ形を渡す\n\n新しい Hook を足すたびに、読む側の心智コストが増えてしまいます。\n\n### 3. 何でも Hook に入れようとする\n\nHook は便利ですが、メインの状態遷移まで押し込む場所ではありません。\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s08_hook_system.py\n```\n\n見るポイント:\n\n1. どのイベントで Hook が走るか\n2. Hook が主ループを壊さずに追加動作だけを行っているか\n3. イベント情報の形が揃っているか\n"
},
{
"version": "s09",
+ "slug": "s09-memory-system",
"locale": "ja",
- "title": "s09: Agent Teams",
- "content": "# s09: Agent Teams\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"一人で終わらないなら、チームメイトに任せる\"* -- 永続チームメイト + 非同期メールボックス。\n\n## 問題\n\nサブエージェント(s04)は使い捨てだ: 生成し、作業し、要約を返し、消滅する。アイデンティティもなく、呼び出し間の記憶もない。バックグラウンドタスク(s08)はシェルコマンドを実行するが、LLM誘導の意思決定はできない。\n\n本物のチームワークには: (1)単一プロンプトを超えて存続する永続エージェント、(2)アイデンティティとライフサイクル管理、(3)エージェント間の通信チャネルが必要だ。\n\n## 解決策\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## 仕組み\n\n1. TeammateManagerがconfig.jsonでチーム名簿を管理する。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()`がチームメイトを作成し、そのエージェントループをスレッドで開始する。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: 追記専用のJSONLインボックス。`send()`がJSON行を追記し、`read_inbox()`がすべて読み取ってドレインする。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 各チームメイトは各LLM呼び出しの前にインボックスを確認し、受信メッセージをコンテキストに注入する。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## s08からの変更点\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | message + broadcast |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. `/team`と入力してステータス付きのチーム名簿を確認する\n5. `/inbox`と入力してリーダーのインボックスを手動確認する\n"
+ "title": "s09: Memory System",
+ "kind": "chapter",
+ "filename": "s09-memory-system.md",
+ "content": "# s09: Memory System\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > [ s09 ] > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *memory は会話の全部を保存する場所ではない。次のセッションでも残すべき事実だけを小さく持つ場所である。*\n\n## この章が解決する問題\n\nmemory がなければ、新しいセッションは毎回ゼロから始まります。\n\nその結果、agent は何度も同じことを忘れます。\n\n- ユーザーの好み\n- すでに何度も訂正された注意点\n- コードだけでは分かりにくいプロジェクト事情\n- 外部参照の場所\n\nそこで必要になるのが memory です。\n\n## 最初に立てるべき境界\n\nこの章で最も大事なのは:\n\n**何でも memory に入れない**\n\nことです。\n\nmemory に入れるべきなのは:\n\n- セッションをまたいでも価値がある\n- 現在のリポジトリを読み直すだけでは分かりにくい\n\nこうした情報だけです。\n\n## 主線とどう併読するか\n\n- memory を「長い context の置き場」だと思ってしまうなら、[`s06-context-compact.md`](./s06-context-compact.md) に戻って compact と durable memory を分けます。\n- `messages[]`、summary block、memory store が頭の中で混ざってきたら、[`data-structures.md`](./data-structures.md) を見ながら読みます。\n- このあと `s10` へ進むなら、[`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md) を横に置くと、memory が次の入力へどう戻るかをつかみやすくなります。\n\n## 初学者向けの 4 分類\n\n### 1. `user`\n\n安定したユーザーの好み。\n\n例:\n\n- `pnpm` を好む\n- 回答は短めがよい\n\n### 2. `feedback`\n\nユーザーが明示的に直した点。\n\n例:\n\n- 生成ファイルは勝手に触らない\n- テストの更新前に確認する\n\n### 3. `project`\n\nコードを見ただけでは分かりにくい持続的事情。\n\n### 4. `reference`\n\n外部資料や外部ボードへの参照先。\n\n## 入れてはいけないもの\n\n| 入れないもの | 理由 |\n|---|---|\n| ディレクトリ構造 | コードを読めば分かる |\n| 関数名やシグネチャ | ソースが真実だから |\n| 現在タスクの進捗 | task / plan の責務 |\n| 一時的なブランチ名 | すぐ古くなる |\n| 秘密情報 | 危険 |\n\n## 最小の心智モデル\n\n```text\nconversation\n |\n | 長期的に残すべき事実が出る\n v\nsave_memory\n |\n v\n.memory/\n ├── MEMORY.md\n ├── prefer_pnpm.md\n └── ask_before_codegen.md\n |\n v\n次回セッション開始時に再読込\n```\n\n## 重要なデータ構造\n\n### 1. 1 メモリ = 1 ファイル\n\n```md\n---\nname: prefer_pnpm\ndescription: User prefers pnpm over npm\ntype: user\n---\nThe user explicitly prefers pnpm for package management commands.\n```\n\n### 2. 小さな索引\n\n```md\n# Memory Index\n\n- prefer_pnpm [user]\n- ask_before_codegen [feedback]\n```\n\n索引は内容そのものではなく、「何があるか」を素早く知るための地図です。\n\n## 最小実装\n\n```python\nMEMORY_TYPES = (\"user\", \"feedback\", \"project\", \"reference\")\n```\n\n```python\ndef save_memory(name, description, mem_type, content):\n path = memory_dir / f\"{slugify(name)}.md\"\n path.write_text(render_frontmatter(name, description, mem_type) + content)\n rebuild_index()\n```\n\n次に、セッション開始時に読み込みます。\n\n```python\nmemories = memory_store.load_all()\n```\n\nそして `s10` で prompt 組み立てに入れます。\n\n## 近い概念との違い\n\n### memory\n\n次回以降も役立つ事実。\n\n### task\n\nいま何を完了したいか。\n\n### plan\n\nこのターンでどう進めるか。\n\n### `CLAUDE.md`\n\nより安定した指示文書や standing rules。\n\n## 初学者がよくやる間違い\n\n### 1. コードを読めば分かることまで保存する\n\nそれは memory ではなく、重複です。\n\n### 2. 現在の作業状況を memory に入れる\n\nそれは task / plan の責務です。\n\n### 3. memory を絶対真実のように扱う\n\nmemory は古くなり得ます。\n\n安全な原則は:\n\n**memory は方向を与え、現在観測は真実を与える。**\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s09_memory_system.py\n```\n"
},
{
"version": "s10",
+ "slug": "s10-system-prompt",
+ "locale": "ja",
+ "title": "s10: System Prompt",
+ "kind": "chapter",
+ "filename": "s10-system-prompt.md",
+ "content": "# s10: System Prompt\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > [ s10 ] > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *system prompt は巨大な固定文字列ではなく、複数ソースから組み立てるパイプラインである。*\n\n## なぜこの章が必要か\n\n最初は 1 本の system prompt 文字列でも動きます。\n\nしかし機能が増えると、入力の材料が増えます。\n\n- 安定した役割説明\n- ツール一覧\n- skills\n- memory\n- `CLAUDE.md`\n- 現在ディレクトリや日時のような動的状態\n\nこうなると、1 本の固定文字列では心智が崩れます。\n\n## 主線とどう併読するか\n\n- prompt をまだ「大きな謎の文字列」として見てしまうなら、[`s00a-query-control-plane.md`](./s00a-query-control-plane.md) に戻って、モデル入力がどの control 層を通るかを見直します。\n- どの順で何を組み立てるかを安定させたいなら、[`s10a-message-prompt-pipeline.md`](./s10a-message-prompt-pipeline.md) をこの章の橋渡し資料として併読します。\n- system rules、tool docs、memory、runtime state が 1 つの入力塊に見えてきたら、[`data-structures.md`](./data-structures.md) で入力片の出所を分け直します。\n\n## 最小の心智モデル\n\n```text\n1. core identity\n2. tools\n3. skills\n4. memory\n5. CLAUDE.md chain\n6. dynamic runtime context\n```\n\n最後に順に連結します。\n\n```text\ncore\n+ tools\n+ skills\n+ memory\n+ claude_md\n+ dynamic_context\n= final model input\n```\n\n## 最も重要な境界\n\n分けるべきなのは:\n\n- 安定したルール\n- 毎ターン変わる補足情報\n\n安定したもの:\n\n- 役割\n- 安全ルール\n- ツール契約\n- 長期指示\n\n動的なもの:\n\n- 現在日時\n- cwd\n- 現在モード\n- このターンだけの注意\n\n## 最小 builder\n\n```python\nclass SystemPromptBuilder:\n def build(self) -> str:\n parts = []\n parts.append(self._build_core())\n parts.append(self._build_tools())\n parts.append(self._build_skills())\n parts.append(self._build_memory())\n parts.append(self._build_claude_md())\n parts.append(self._build_dynamic())\n return \"\\n\\n\".join(p for p in parts if p)\n```\n\nここで重要なのは、各メソッドが 1 つの責務だけを持つことです。\n\n## 1 本の大文字列より良い理由\n\n### 1. どこから来た情報か分かる\n\n### 2. 部分ごとにテストしやすい\n\n### 3. 安定部分と動的部分を分けて育てられる\n\n## `system prompt` と `system reminder`\n\nより分かりやすい考え方は:\n\n- `system prompt`: 安定した土台\n- `system reminder`: このターンだけの追加注意\n\nこうすると、長期ルールと一時的ノイズが混ざりにくくなります。\n\n## `CLAUDE.md` が独立した段なのはなぜか\n\n`CLAUDE.md` は memory でも skill でもありません。\n\nより安定した指示文書の層です。\n\n教学版では、次のように積み上げると理解しやすいです。\n\n1. ユーザー級\n2. プロジェクト根\n3. サブディレクトリ級\n\n重要なのは:\n\n**指示源は上書き一発ではなく、層として積める**\n\nということです。\n\n## memory とこの章の関係\n\nmemory は保存するだけでは意味がありません。\n\nモデル入力に再び入って初めて、agent の行動に効いてきます。\n\nだから:\n\n- `s09` で記憶する\n- `s10` で入力に組み込む\n\nという流れになります。\n\n## 初学者が混乱しやすい点\n\n### 1. system prompt を固定文字列だと思う\n\n### 2. 毎回変わる情報も全部同じ塊に入れる\n\n### 3. skills、memory、`CLAUDE.md` を同じものとして扱う\n\n似て見えても責務は違います。\n\n- `skills`: 任意の能力パッケージ\n- `memory`: セッションをまたぐ事実\n- `CLAUDE.md`: 立ち続ける指示文書\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s10_system_prompt.py\n```\n"
+ },
+ {
+ "version": null,
+ "slug": "s10a-message-prompt-pipeline",
"locale": "ja",
- "title": "s10: Team Protocols",
- "content": "# s10: Team Protocols\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"チームメイト間には統一の通信ルールが必要\"* -- 1つの request-response パターンが全交渉を駆動。\n\n## 問題\n\ns09ではチームメイトが作業し通信するが、構造化された協調がない:\n\n**シャットダウン**: スレッドを強制終了するとファイルが中途半端に書かれ、config.jsonが不正な状態になる。ハンドシェイクが必要 -- リーダーが要求し、チームメイトが承認(完了して退出)か拒否(作業継続)する。\n\n**プラン承認**: リーダーが「認証モジュールをリファクタリングして」と言うと、チームメイトは即座に開始する。リスクの高い変更では、実行前にリーダーが計画をレビューすべきだ。\n\n両方とも同じ構造: 一方がユニークIDを持つリクエストを送り、他方がそのIDで応答する。\n\n## 解決策\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## 仕組み\n\n1. リーダーがrequest_idを生成し、インボックス経由でシャットダウンを開始する。\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. チームメイトがリクエストを受信し、承認または拒否で応答する。\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. プラン承認も同一パターン。チームメイトがプランを提出(request_idを生成)、リーダーがレビュー(同じrequest_idを参照)。\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\n1つのFSM、2つの応用。同じ`pending -> approved | rejected`状態機械が、あらゆるリクエスト-レスポンスプロトコルに適用できる。\n\n## s09からの変更点\n\n| Component | Before (s09) | After (s10) |\n|----------------|------------------|------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\n| Shutdown | Natural exit only| Request-response handshake |\n| Plan gating | None | Submit/review with approval |\n| Correlation | None | request_id per request |\n| FSM | None | pending -> approved/rejected |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. `/team`と入力してステータスを監視する\n"
+ "title": "s10a: Message / Prompt 組み立てパイプライン",
+ "kind": "bridge",
+ "filename": "s10a-message-prompt-pipeline.md",
+ "content": "# s10a: Message / Prompt 組み立てパイプライン\n\n> これは `s10` を補う橋渡し文書です。 \n> ここでの問いは:\n>\n> **モデルが実際に見る入力は、system prompt 1 本だけなのか。**\n\n## 結論\n\n違います。\n\n高完成度の system では、モデル入力は複数 source の合成物です。\n\nたとえば:\n\n- stable system prompt blocks\n- normalized messages\n- memory section\n- dynamic reminders\n- tool instructions\n\nつまり system prompt は大事ですが、**入力全体の一部**です。\n\n## 最小の心智モデル\n\n```text\nstable rules\n +\ntool surface\n +\nmemory / CLAUDE.md / skills\n +\nnormalized messages\n +\ndynamic reminders\n =\nfinal model input\n```\n\n## 主要な構造\n\n### `PromptParts`\n\n入力 source を組み立て前に分けて持つ構造です。\n\n```python\nparts = {\n \"core\": \"...\",\n \"tools\": \"...\",\n \"memory\": \"...\",\n \"skills\": \"...\",\n \"dynamic\": \"...\",\n}\n```\n\n### `SystemPromptBlock`\n\n1 本の巨大文字列ではなく、section 単位で扱うための単位です。\n\n```python\nblock = {\n \"text\": \"...\",\n \"cache_scope\": None,\n}\n```\n\n### `NormalizedMessage`\n\nAPI に渡す前に整えられた messages です。\n\n```python\n{\n \"role\": \"user\",\n \"content\": [\n {\"type\": \"text\", \"text\": \"...\"}\n ],\n}\n```\n\n## なぜ分ける必要があるか\n\n### 1. 何が stable で何が dynamic かを分けるため\n\n- system rules は比較的 stable\n- current messages は dynamic\n- reminders はより短命\n\n### 2. どの source が何を足しているか追えるようにするため\n\nsource を混ぜて 1 本にすると:\n\n- memory がどこから来たか\n- skill がいつ入ったか\n- reminder がなぜ入ったか\n\nが見えにくくなります。\n\n### 3. compact / recovery / retry の説明がしやすくなるため\n\n入力 source が分かれていると:\n\n- 何を再利用するか\n- 何を要約するか\n- 何を次ターンで作り直すか\n\nが明確になります。\n\n## 初学者が混ぜやすい境界\n\n### `Message` と `PromptBlock`\n\n- `Message`: 会話履歴\n- `PromptBlock`: system 側の説明断片\n\n### `Memory` と `Prompt`\n\n- memory は内容 source\n- prompt pipeline は source を組む仕組み\n\n### `Tool instructions` と `Messages`\n\n- tool instructions は model が使える surface の説明\n- messages は今まで起きた対話 / 結果\n\n## 一文で覚える\n\n**system prompt は入力の全部ではなく、複数 source を束ねた pipeline の 1 つの section です。**\n"
},
{
"version": "s11",
+ "slug": "s11-error-recovery",
"locale": "ja",
- "title": "s11: Autonomous Agents",
- "content": "# s11: Autonomous Agents\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"チームメイトが自らボードを見て、仕事を取る\"* -- リーダーが逐一割り振る必要はない。\n\n## 問題\n\ns09-s10では、チームメイトは明示的に指示された時のみ作業する。リーダーは各チームメイトを特定のプロンプトでspawnしなければならない。タスクボードに未割り当てのタスクが10個あっても、リーダーが手動で各タスクを割り当てる。これはスケールしない。\n\n真の自律性とは、チームメイトが自分で作業を見つけること: タスクボードをスキャンし、未確保のタスクを確保し、作業し、完了したら次を探す。\n\nもう1つの問題: コンテキスト圧縮(s06)後にエージェントが自分の正体を忘れる可能性がある。アイデンティティ再注入がこれを解決する。\n\n## 解決策\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## 仕組み\n\n1. チームメイトのループはWORKとIDLEの2フェーズ。LLMがツール呼び出しを止めた時(または`idle`ツールを呼んだ時)、IDLEフェーズに入る。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. IDLEフェーズがインボックスとタスクボードをポーリングする。\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']} \"})\n return True\n return False # timeout -> shutdown\n```\n\n3. タスクボードスキャン: pendingかつ未割り当てかつブロックされていないタスクを探す。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. アイデンティティ再注入: コンテキストが短すぎる(圧縮が起きた)場合にアイデンティティブロックを挿入する。\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work. \"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## s10からの変更点\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. `/tasks`と入力してオーナー付きのタスクボードを確認する\n5. `/team`と入力して誰が作業中でアイドルかを監視する\n"
+ "title": "s11: Error Recovery",
+ "kind": "chapter",
+ "filename": "s11-error-recovery.md",
+ "content": "# s11: Error Recovery\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > [ s11 ] > s12 > s13 > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *error は例外イベントではなく、main loop が最初から用意しておくべき通常分岐です。*\n\n## この章が解く問題\n\n`s10` まで来ると agent はもう demo ではありません。\n\nすでに system には、\n\n- main loop\n- tool use\n- planning\n- compaction\n- permission\n- hook\n- memory\n- prompt assembly\n\nがあります。\n\nこうなると failure も自然に増えます。\n\n- model output が途中で切れる\n- context が大きすぎて request が入らない\n- API timeout や rate limit で一時的に失敗する\n\nもし recovery がなければ、main loop は最初の失敗で止まります。\n\nそして初心者はよく、\n\n> agent が不安定なのは model が弱いからだ\n\nと誤解します。\n\nしかし実際には多くの failure は、\n\n**task そのものが失敗したのではなく、この turn の続け方を変える必要があるだけ**\n\nです。\n\nこの章の目標は 1 つです。\n\n**「error が出たら停止」から、「error の種類を見て recovery path を選ぶ」へ進むこと**\n\nです。\n\n## 併読すると楽になる資料\n\n- 今の query がなぜまだ続いているのか見失ったら [`s00c-query-transition-model.md`](./s00c-query-transition-model.md)\n- compact と recovery が同じ mechanism に見えたら [`s06-context-compact.md`](./s06-context-compact.md)\n- このあと `s12` へ進む前に、recovery state と durable task state を混ぜたくなったら [`data-structures.md`](./data-structures.md)\n\n## 先に言葉をそろえる\n\n### recovery とは何か\n\nrecovery は「error をなかったことにする」ことではありません。\n\n意味は次です。\n\n- これは一時的 failure かを判定する\n- 一時的なら有限回の補救動作を試す\n- だめなら明示的に fail として返す\n\n### retry budget とは何か\n\nretry budget は、\n\n> 最大で何回までこの recovery path を試すか\n\nです。\n\n例:\n\n- continuation は最大 3 回\n- transport retry は最大 3 回\n\nこれがないと loop が無限に回る危険があります。\n\n### state machine とは何か\n\nこの章での state machine は難しい theory ではありません。\n\n単に、\n\n> normal execution と各 recovery branch を、明確な状態遷移として見ること\n\nです。\n\nこの章から query の進行は次のように見えるようになります。\n\n- normal\n- continue after truncation\n- compact then retry\n- backoff then retry\n- final fail\n\n## 最小心智モデル\n\n最初は 3 種類の failure だけ区別できれば十分です。\n\n```text\n1. output truncated\n model はまだ言い終わっていないが token が尽きた\n\n2. context too large\n request 全体が model window に入らない\n\n3. transient transport failure\n timeout / rate limit / temporary connection issue\n```\n\nそれぞれに対応する recovery path はこうです。\n\n```text\nLLM call\n |\n +-- stop_reason == \"max_tokens\"\n | -> continuation message を入れる\n | -> retry\n |\n +-- prompt too long\n | -> compact する\n | -> retry\n |\n +-- timeout / rate limit / connection error\n -> 少し待つ\n -> retry\n```\n\nこれが最小ですが、十分に正しい recovery model です。\n\n## この章の核になるデータ構造\n\n### 1. Recovery State\n\n```python\nrecovery_state = {\n \"continuation_attempts\": 0,\n \"compact_attempts\": 0,\n \"transport_attempts\": 0,\n}\n```\n\n役割は 2 つあります。\n\n- 各 recovery path ごとの retry 回数を分けて数える\n- 無限 recovery を防ぐ\n\n### 2. Recovery Decision\n\n```python\n{\n \"kind\": \"continue\" | \"compact\" | \"backoff\" | \"fail\",\n \"reason\": \"why this branch was chosen\",\n}\n```\n\nここで大事なのは、\n\n**error の見た目と、次に選ぶ動作を分ける**\n\nことです。\n\nこの分離があると loop が読みやすくなります。\n\n### 3. Continuation Message\n\n```python\nCONTINUE_MESSAGE = (\n \"Output limit hit. Continue directly from where you stopped. \"\n \"Do not restart or repeat.\"\n)\n```\n\nこの message は地味ですが非常に重要です。\n\nなぜなら model は「続けて」とだけ言うと、\n\n- 最初から言い直す\n- もう一度要約し直す\n- 直前の内容を繰り返す\n\nことがあるからです。\n\n## 最小実装を段階で追う\n\n### 第 1 段階: recovery chooser を作る\n\n```python\ndef choose_recovery(stop_reason: str | None, error_text: str | None) -> dict:\n if stop_reason == \"max_tokens\":\n return {\"kind\": \"continue\", \"reason\": \"output truncated\"}\n\n if error_text and \"prompt\" in error_text and \"long\" in error_text:\n return {\"kind\": \"compact\", \"reason\": \"context too large\"}\n\n if error_text and any(word in error_text for word in [\n \"timeout\", \"rate\", \"unavailable\", \"connection\"\n ]):\n return {\"kind\": \"backoff\", \"reason\": \"transient transport failure\"}\n\n return {\"kind\": \"fail\", \"reason\": \"unknown or non-recoverable error\"}\n```\n\nこの関数がやっている本質は、\n\n**まず分類し、そのあと branch を返す**\n\nという 1 点です。\n\n### 第 2 段階: main loop に差し込む\n\n```python\nwhile True:\n try:\n response = client.messages.create(...)\n decision = choose_recovery(response.stop_reason, None)\n except Exception as e:\n response = None\n decision = choose_recovery(None, str(e).lower())\n\n if decision[\"kind\"] == \"continue\":\n messages.append({\"role\": \"user\", \"content\": CONTINUE_MESSAGE})\n continue\n\n if decision[\"kind\"] == \"compact\":\n messages = auto_compact(messages)\n continue\n\n if decision[\"kind\"] == \"backoff\":\n time.sleep(backoff_delay(...))\n continue\n\n if decision[\"kind\"] == \"fail\":\n break\n\n # normal tool handling\n```\n\nここで一番大事なのは、\n\n- catch したら即 stop\n\nではなく、\n\n- 何の失敗かを見る\n- どの recovery path を試すか決める\n\nという構造です。\n\n## 3 つの主 recovery path が埋めている穴\n\n### 1. continuation\n\nこれは「model が言い終わる前に output budget が切れた」問題を埋めます。\n\n本質は、\n\n> task が失敗したのではなく、1 turn の出力空間が足りなかった\n\nということです。\n\n最小形はこうです。\n\n```python\nif response.stop_reason == \"max_tokens\":\n if state[\"continuation_attempts\"] >= 3:\n return \"Error: output recovery exhausted\"\n state[\"continuation_attempts\"] += 1\n messages.append({\"role\": \"user\", \"content\": CONTINUE_MESSAGE})\n continue\n```\n\n### 2. compact\n\nこれは「task が無理」ではなく、\n\n> active context が大きすぎて request が入らない\n\nときに使います。\n\nここで大事なのは、compact を delete と考えないことです。\n\ncompact は、\n\n**過去を、そのままの原文ではなく、まだ続行可能な summary へ変換する**\n\n操作です。\n\n最小例:\n\n```python\ndef auto_compact(messages: list) -> list:\n summary = summarize_messages(messages)\n return [{\n \"role\": \"user\",\n \"content\": \"This session was compacted. Continue from this summary:\\n\" + summary,\n }]\n```\n\n最低限 summary に残したいのは次です。\n\n- 今の task は何か\n- 何をすでに終えたか\n- 重要 decision は何か\n- 次に何をするつもりか\n\n### 3. backoff\n\nこれは timeout、rate limit、temporary connection issue のような\n\n**時間を置けば通るかもしれない failure**\n\nに対して使います。\n\n考え方は単純です。\n\n```python\nif decision[\"kind\"] == \"backoff\":\n if state[\"transport_attempts\"] >= 3:\n break\n state[\"transport_attempts\"] += 1\n time.sleep(backoff_delay(state[\"transport_attempts\"]))\n continue\n```\n\nここで大切なのは「retry すること」よりも、\n\n**retry にも budget があり、同じ速度で無限に叩かないこと**\n\nです。\n\n## compact と recovery を混ぜない\n\nこれは初学者が特に混ぜやすい点です。\n\n- `s06` の compact は context hygiene のために行うことがある\n- `s11` の compact recovery は request failure から戻るために行う\n\n同じ compact という操作でも、\n\n**目的が違います。**\n\n目的が違えば、それを呼ぶ branch も別に見るべきです。\n\n## recovery は query の continuation 理由でもある\n\n`s11` の重要な学びは、error handling を `except` の奥へ隠さないことです。\n\nむしろ次を explicit に持つ方が良いです。\n\n- なぜまだ続いているのか\n- 何回その branch を試したのか\n- 次にどの branch を試すのか\n\nすると recovery は hidden plumbing ではなく、\n\n**query transition を説明する状態**\n\nになります。\n\n## 初学者が混ぜやすいポイント\n\n### 1. すべての failure に同じ retry をかける\n\ntruncation と transport error は同じ問題ではありません。\n\n### 2. retry budget を持たない\n\n無限 loop の原因になります。\n\n### 3. compact と recovery を 1 つの話にしてしまう\n\ncontext hygiene と failure recovery は目的が違います。\n\n### 4. continuation message を曖昧にする\n\n「続けて」だけでは model が restart / repeat しやすいです。\n\n### 5. なぜ続行しているのかを state に残さない\n\ndebug も teaching も急に難しくなります。\n\n## この章を読み終えたら何が言えるべきか\n\n1. 多くの error は task failure ではなく、「この turn の続け方を変えるべき」信号である\n2. recovery は `continue / compact / backoff / fail` の branch として考えられる\n3. recovery path ごとに budget を持たないと loop が壊れやすい\n\n## 一文で覚える\n\n**Error Recovery とは、failure を見た瞬間に止まるのではなく、failure の種類に応じて continuation path を選び直す control layer です。**\n"
},
{
"version": "s12",
+ "slug": "s12-task-system",
+ "locale": "ja",
+ "title": "s12: Task System",
+ "kind": "chapter",
+ "filename": "s12-task-system.md",
+ "content": "# s12: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"大きな目標を小タスクに分解し、順序付けし、ディスクに記録する\"* -- ファイルベースのタスクグラフ、マルチエージェント協調の基盤。\n>\n> **Harness 層**: 永続タスク -- どの会話よりも長く生きる目標。\n\n## 問題\n\ns03のTodoManagerはメモリ上のフラットなチェックリストに過ぎない: 順序なし、依存関係なし、ステータスは完了か未完了のみ。実際の目標には構造がある -- タスクBはタスクAに依存し、タスクCとDは並行実行でき、タスクEはCとDの両方を待つ。\n\n明示的な関係がなければ、エージェントは何が実行可能で、何がブロックされ、何が同時に走れるかを判断できない。しかもリストはメモリ上にしかないため、コンテキスト圧縮(s06)で消える。\n\n## 主線とどう併読するか\n\n- `s03` からそのまま来たなら、[`data-structures.md`](./data-structures.md) へ戻って `TodoItem` / `PlanState` と `TaskRecord` を分けます。\n- object 境界が混ざり始めたら、[`entity-map.md`](./entity-map.md) で message、task、runtime task、teammate を分離してから戻ります。\n- 次に `s13` を読むなら、[`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) を横に置いて、durable task と runtime task を同じ言葉で潰さないようにします。\n\n## 解決策\n\nフラットなチェックリストをディスクに永続化する**タスクグラフ**に昇格させる。各タスクは1つのJSONファイルで、ステータス・前方依存(`blockedBy`)を持つ。タスクグラフは常に3つの問いに答える:\n\n- **何が実行可能か?** -- `pending`ステータスで`blockedBy`が空のタスク。\n- **何がブロックされているか?** -- 未完了の依存を待つタスク。\n- **何が完了したか?** -- `completed`のタスク。完了時に後続タスクを自動的にアンブロックする。\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nタスクグラフ (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\n順序: task 1 は 2 と 3 より先に完了する必要がある\n並行: task 2 と 3 は同時に実行できる\n依存: task 4 は 2 と 3 の両方を待つ\nステータス: pending -> in_progress -> completed\n```\n\nこのタスクグラフは後続の runtime / platform 章の協調バックボーンになる: バックグラウンド実行(`s13`)、マルチエージェントチーム(`s15+`)、worktree 分離(`s18`)はすべてこの durable な構造の恩恵を受ける。\n\n## 仕組み\n\n1. **TaskManager**: タスクごとに1つのJSONファイル、依存グラフ付きCRUD。\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **依存解除**: タスク完了時に、他タスクの`blockedBy`リストから完了IDを除去し、後続タスクをアンブロックする。\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **ステータス遷移 + 依存配線**: `update`がステータス変更と依存エッジを担う。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, remove_blocked_by=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if remove_blocked_by:\n task[\"blockedBy\"] = [x for x in task[\"blockedBy\"] if x not in remove_blocked_by]\n self._save(task)\n```\n\n4. 4つのタスクツールをディスパッチマップに追加する。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\n`s12` 以降、タスクグラフが durable なマルチステップ作業のデフォルトになる。`s03` の Todo は軽量な単一セッション用チェックリストとして残る。\n\n## s06からの変更点\n\n| コンポーネント | Before (s06) | After (s12) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 計画モデル | フラットチェックリスト (メモリ) | 依存関係付きタスクグラフ (ディスク) |\n| 関係 | なし | `blockedBy` エッジ |\n| ステータス追跡 | 完了か未完了 | `pending` -> `in_progress` -> `completed` |\n| 永続性 | 圧縮で消失 | 圧縮・再起動後も存続 |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s12_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n\n## 教学上の境界\n\nこのリポジトリで本当に重要なのは、完全な製品向け保存層の再現ではありません。\n\n重要なのは:\n\n- durable なタスク記録\n- 明示的な依存エッジ\n- 分かりやすい状態遷移\n- 後続章が再利用できる構造\n\nこの 4 点を自分で実装できれば、タスクシステムの核心はつかめています。\n"
+ },
+ {
+ "version": "s13",
+ "slug": "s13-background-tasks",
+ "locale": "ja",
+ "title": "s13: バックグラウンドタスク",
+ "kind": "chapter",
+ "filename": "s13-background-tasks.md",
+ "content": "# s13: バックグラウンドタスク\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > [ s13 ] > s14 > s15 > s16 > s17 > s18 > s19`\n\n> *遅い command は横で待たせればよく、main loop まで一緒に止まる必要はありません。*\n\n## この章が解く問題\n\n前の章までの tool call は、基本的に次の形でした。\n\n```text\nmodel が tool を要求する\n ->\nすぐ実行する\n ->\nすぐ結果を返す\n```\n\n短い command ならこれで問題ありません。\n\nでも次のような処理はすぐに詰まります。\n\n- `npm install`\n- `pytest`\n- `docker build`\n- 重い code generation\n- 長時間の lint / typecheck\n\nもし main loop がその完了を同期的に待ち続けると、2 つの問題が起きます。\n\n- model は待ち時間のあいだ次の判断へ進めない\n- user は別の軽い作業を進めたいのに、agent 全体が足止めされる\n\nこの章で入れるのは、\n\n**遅い実行を background へ逃がし、main loop は次の仕事へ進めるようにすること**\n\nです。\n\n## 併読すると楽になる資料\n\n- `task goal` と `live execution slot` がまだ混ざるなら [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n- `RuntimeTaskRecord` と task board の境界を見直したいなら [`data-structures.md`](./data-structures.md)\n- background execution が「別の main loop」に見えてきたら [`s02b-tool-execution-runtime.md`](./s02b-tool-execution-runtime.md)\n\n## 先に言葉をそろえる\n\n### foreground とは何か\n\nここで言う foreground は、\n\n> この turn の中で今すぐ結果が必要なので、main loop がその場で待つ実行\n\nです。\n\n### background とは何か\n\nbackground は謎の裏世界ではありません。\n\n意味は単純で、\n\n> command を別の execution line に任せ、main loop は先に別のことを進める\n\nことです。\n\n### 通知キューとは何か\n\nbackground task が終わっても、その完全な出力をいきなり model へ丸ごと押し込む必要はありません。\n\nいったん queue に要約通知として積み、\n\n> 次の model call の直前にまとめて main loop へ戻す\n\nのが分かりやすい設計です。\n\n## 最小心智モデル\n\nこの章で最も大切な 1 文は次です。\n\n**並行になるのは実行と待機であって、main loop 自体が増えるわけではありません。**\n\n図にするとこうです。\n\n```text\nMain loop\n |\n +-- background_run(\"pytest\")\n | -> すぐ task_id を返す\n |\n +-- そのまま別の仕事を続ける\n |\n +-- 次の model call の前\n -> drain_notifications()\n -> 結果要約を messages へ注入\n\nBackground lane\n |\n +-- 実際に subprocess を実行\n +-- 終了後に result preview を queue へ積む\n```\n\nこの図を保ったまま理解すれば、後でもっと複雑な runtime へ進んでも心智が崩れにくくなります。\n\n## この章の核になるデータ構造\n\n### 1. RuntimeTaskRecord\n\nこの章で扱う background task は durable task board の task とは別物です。\n\n教材コードでは、background 実行はおおむね次の record を持ちます。\n\n```python\ntask = {\n \"id\": \"a1b2c3d4\",\n \"command\": \"pytest\",\n \"status\": \"running\",\n \"started_at\": 1710000000.0,\n \"finished_at\": None,\n \"result_preview\": \"\",\n \"output_file\": \".runtime-tasks/a1b2c3d4.log\",\n}\n```\n\n各 field の意味は次の通りです。\n\n- `id`: runtime slot の識別子\n- `command`: 今走っている command\n- `status`: `running` / `completed` / `timeout` / `error`\n- `started_at`: いつ始まったか\n- `finished_at`: いつ終わったか\n- `result_preview`: model に戻す短い要約\n- `output_file`: 完全出力の保存先\n\n教材版ではこれを disk 上にも分けて残します。\n\n```text\n.runtime-tasks/\n a1b2c3d4.json\n a1b2c3d4.log\n```\n\nこれで読者は、\n\n- `json` は状態 record\n- `log` は完全出力\n- model へ戻すのはまず preview\n\nという 3 層を自然に見分けられます。\n\n### 2. Notification\n\nbackground result はまず notification queue に入ります。\n\n```python\nnotification = {\n \"task_id\": \"a1b2c3d4\",\n \"status\": \"completed\",\n \"command\": \"pytest\",\n \"preview\": \"42 tests passed\",\n \"output_file\": \".runtime-tasks/a1b2c3d4.log\",\n}\n```\n\nnotification の役割は 1 つだけです。\n\n> main loop に「結果が戻ってきた」と知らせること\n\nここに完全出力の全量を埋め込む必要はありません。\n\n## 最小実装を段階で追う\n\n### 第 1 段階: background manager を持つ\n\n最低限必要なのは次の 2 つの状態です。\n\n- `tasks`: いま存在する runtime task\n- `_notification_queue`: main loop にまだ回収されていない結果\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\nここで lock を置いているのは、background thread と main loop が同じ queue / dict を触るからです。\n\n### 第 2 段階: `run()` はすぐ返す\n\nbackground 化の一番大きな変化はここです。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\n \"id\": task_id,\n \"status\": \"running\",\n \"command\": command,\n \"started_at\": time.time(),\n }\n\n thread = threading.Thread(\n target=self._execute,\n args=(task_id, command),\n daemon=True,\n )\n thread.start()\n return task_id\n```\n\n重要なのは thread 自体より、\n\n**main loop が結果ではなく `task_id` を受け取り、先に進める**\n\nことです。\n\n### 第 3 段階: subprocess が終わったら notification を積む\n\n```python\ndef _execute(self, task_id: str, command: str):\n try:\n result = subprocess.run(..., timeout=300)\n status = \"completed\"\n preview = (result.stdout + result.stderr)[:500]\n except subprocess.TimeoutExpired:\n status = \"timeout\"\n preview = \"command timed out\"\n\n with self._lock:\n self.tasks[task_id][\"status\"] = status\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"preview\": preview,\n })\n```\n\nここでの設計意図ははっきりしています。\n\n- execution lane は command を実際に走らせる\n- notification queue は main loop へ戻すための要約を持つ\n\n役割を分けることで、result transport が見やすくなります。\n\n### 第 4 段階: 次の model call 前に queue を drain する\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifications = BG.drain_notifications()\n if notifications:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['preview']}\" for n in notifications\n )\n messages.append({\n \"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n \",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted background results.\",\n })\n```\n\nこの構造が大切です。\n\n結果は「いつでも割り込んで model へ押し込まれる」のではなく、\n\n**次の model call の入口でまとめて注入される**\n\nからです。\n\n### 第 5 段階: preview と full output を分ける\n\n教材コードでは `result_preview` と `output_file` を分けています。\n\nこれは初心者にも非常に大事な設計です。\n\nなぜなら background result にはしばしば次の問題があるからです。\n\n- 出力が長い\n- model に全量を見せる必要がない\n- user だけ詳細 log を見れば十分なことが多い\n\nそこでまず model には短い preview を返し、必要なら後で `read_file` 等で full log を読む形にします。\n\n### 第 6 段階: stalled task も見られるようにする\n\n教材コードは `STALL_THRESHOLD_S` を持ち、長く走りすぎている task を拾えます。\n\n```python\ndef detect_stalled(self) -> list[str]:\n now = time.time()\n stalled = []\n for task_id, info in self.tasks.items():\n if info[\"status\"] != \"running\":\n continue\n elapsed = now - info.get(\"started_at\", now)\n if elapsed > STALL_THRESHOLD_S:\n stalled.append(task_id)\n return stalled\n```\n\nここで学ぶべき本質は sophisticated monitoring ではありません。\n\n**background 化したら「開始したまま返ってこないもの」を見張る観点が必要になる**\n\nということです。\n\n## これは task board の task とは違う\n\nここは混ざりやすいので強調します。\n\n`s12` の `task` は durable goal node です。\n\n一方この章の background task は、\n\n> いま実行中の live runtime slot\n\nです。\n\n同じ `task` という言葉を使っても指している層が違います。\n\nだから分からなくなったら、本文だけを往復せずに次へ戻るべきです。\n\n- [`entity-map.md`](./entity-map.md)\n- [`data-structures.md`](./data-structures.md)\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n\n## 前の章とどうつながるか\n\nこの章は `s12` の durable task graph を否定する章ではありません。\n\nむしろ、\n\n- `s12` が「何の仕事が存在するか」を管理し\n- `s13` が「いまどの command が走っているか」を管理する\n\nという役割分担を教える章です。\n\n後の `s14`、`s17`、`s18` へ行く前に、\n\n**goal と runtime slot を分けて見る癖**\n\nをここで作っておくことが重要です。\n\n## 初学者が混ぜやすいポイント\n\n### 1. background execution を「もう 1 本の main loop」と考える\n\n実際に増えているのは subprocess waiting lane であって、main conversational loop ではありません。\n\n### 2. result を queue ではなく即座に messages へ乱暴に書き込む\n\nこれでは model input の入口が分散し、system の流れが追いにくくなります。\n\n### 3. full output と preview を分けない\n\n長い log で context がすぐあふれます。\n\n### 4. runtime task と durable task を同一視する\n\n「いま走っている command」と「長く残る work goal」は別物です。\n\n### 5. queue 操作に lock を使わない\n\nbackground thread と main loop の競合で状態が壊れやすくなります。\n\n### 6. timeout / error を `completed` と同じように扱う\n\n戻すべき情報は同じではありません。終了理由は explicit に残すべきです。\n\n## 教学上の境界\n\nこの章でまず理解すべき中心は、製品用の完全な async runtime ではありません。\n\n中心は次の 3 行です。\n\n- 遅い仕事を foreground から切り離す\n- 結果は notification として main loop に戻す\n- runtime slot は durable task board とは別層で管理する\n\nここが腹落ちしてから、\n\n- より複雑な scheduler\n- 複数種類の background lane\n- 分散 worker\n\nへ進めば十分です。\n"
+ },
+ {
+ "version": null,
+ "slug": "s13a-runtime-task-model",
+ "locale": "ja",
+ "title": "s13a: Runtime Task Model",
+ "kind": "bridge",
+ "filename": "s13a-runtime-task-model.md",
+ "content": "# s13a: Runtime Task Model\n\n> この bridge doc はすぐに混ざる次の点をほどくためのものです。\n>\n> **work graph 上の task と、いま実行中の task は同じものではありません。**\n\n## 主線とどう併読するか\n\n次の順で読むのが最も分かりやすいです。\n\n- まず [`s12-task-system.md`](./s12-task-system.md) を読み、durable な work graph を固める\n- 次に [`s13-background-tasks.md`](./s13-background-tasks.md) を読み、background execution を見る\n- 用語が混ざり始めたら [`glossary.md`](./glossary.md) を見直す\n- field を正確に合わせたいなら [`data-structures.md`](./data-structures.md) と [`entity-map.md`](./entity-map.md) を見直す\n\n## なぜこの橋渡しが必要か\n\n主線自体は正しいです。\n\n- `s12` は task system\n- `s13` は background tasks\n\nただし bridge layer を一枚挟まないと、読者は二種類の「task」をすぐに同じ箱へ入れてしまいます。\n\n例えば:\n\n- 「auth module を実装する」という work-graph task\n- 「pytest を走らせる」という background execution\n- 「alice がコード修正をしている」という teammate execution\n\nどれも日常語では task と呼べますが、同じ層にはありません。\n\n## 二つの全く違う task\n\n### 1. work-graph task\n\nこれは `s12` の durable node です。\n\n答えるものは:\n\n- 何をやるか\n- どの仕事がどの仕事に依存するか\n- 誰が owner か\n- 進捗はどうか\n\nつまり:\n\n> 目標として管理される durable work unit\n\nです。\n\n### 2. runtime task\n\nこちらが答えるものは:\n\n- 今どの execution unit が生きているか\n- それが何の type か\n- running / completed / failed / killed のどれか\n- 出力がどこにあるか\n\nつまり:\n\n> runtime の中で生きている execution slot\n\nです。\n\n## 最小の心智モデル\n\nまず二つの表として分けて考えてください。\n\n```text\nwork-graph task\n - durable\n - goal / dependency oriented\n - 寿命が長い\n\nruntime task\n - execution oriented\n - output / status oriented\n - 寿命が短い\n```\n\n両者の関係は「どちらか一方」ではありません。\n\n```text\n1 つの work-graph task\n から\n1 個以上の runtime task が派生しうる\n```\n\n例えば:\n\n```text\nwork-graph task:\n \"Implement auth module\"\n\nruntime tasks:\n 1. background で test を走らせる\n 2. coder teammate を起動する\n 3. 外部 service を monitor する\n```\n\n## なぜこの区別が重要か\n\nこの境界が崩れると、後続章がすぐに絡み始めます。\n\n- `s13` の background execution が `s12` の task board と混ざる\n- `s15-s17` の teammate work がどこにぶら下がるか不明になる\n- `s18` の worktree が何に紐づくのか曖昧になる\n\n最短の正しい要約はこれです。\n\n**work-graph task は目標を管理し、runtime task は実行を管理する**\n\n## 主要 record\n\n### 1. `WorkGraphTaskRecord`\n\nこれは `s12` の durable task です。\n\n```python\ntask = {\n \"id\": 12,\n \"subject\": \"Implement auth module\",\n \"status\": \"in_progress\",\n \"blockedBy\": [],\n \"blocks\": [13],\n \"owner\": \"alice\",\n \"worktree\": \"auth-refactor\",\n}\n```\n\n### 2. `RuntimeTaskState`\n\n教材版の最小形は次の程度で十分です。\n\n```python\nruntime_task = {\n \"id\": \"b8k2m1qz\",\n \"type\": \"local_bash\",\n \"status\": \"running\",\n \"description\": \"Run pytest\",\n \"start_time\": 1710000000.0,\n \"end_time\": None,\n \"output_file\": \".task_outputs/b8k2m1qz.txt\",\n \"notified\": False,\n}\n```\n\n重要 field は:\n\n- `type`: どの execution unit か\n- `status`: active か terminal か\n- `output_file`: 結果がどこにあるか\n- `notified`: 結果を system がもう表に出したか\n\n### 3. `RuntimeTaskType`\n\n教材 repo ですべての type を即実装する必要はありません。\n\nただし runtime task は単なる shell 1 種ではなく、型族だと読者に見せるべきです。\n\n最小表は:\n\n```text\nlocal_bash\nlocal_agent\nremote_agent\nin_process_teammate\nmonitor\nworkflow\n```\n\n## 最小実装の進め方\n\n### Step 1: `s12` の task board はそのまま保つ\n\nここへ runtime state を混ぜないでください。\n\n### Step 2: 別の runtime task manager を足す\n\n```python\nclass RuntimeTaskManager:\n def __init__(self):\n self.tasks = {}\n```\n\n### Step 3: background work 開始時に runtime task を作る\n\n```python\ndef spawn_bash_task(command: str):\n task_id = new_runtime_id()\n runtime_tasks[task_id] = {\n \"id\": task_id,\n \"type\": \"local_bash\",\n \"status\": \"running\",\n \"description\": command,\n }\n```\n\n### Step 4: 必要なら work graph へ結び戻す\n\n```python\nruntime_tasks[task_id][\"work_graph_task_id\"] = 12\n```\n\n初日から必須ではありませんが、teams や worktrees へ進むほど重要になります。\n\n## 開発者が持つべき図\n\n```text\nWork Graph\n task #12: Implement auth module\n |\n +-- runtime task A: local_bash (pytest)\n +-- runtime task B: local_agent (coder worker)\n +-- runtime task C: monitor (watch service status)\n\nRuntime Task Layer\n A/B/C each have:\n - own runtime ID\n - own status\n - own output\n - own lifecycle\n```\n\n## 後続章とのつながり\n\nこの層が明確になると、後続章がかなり読みやすくなります。\n\n- `s13` の background command は runtime task\n- `s15-s17` の teammate も runtime task の一種として見られる\n- `s18` の worktree は主に durable work に紐づくが runtime execution にも影響する\n- `s19` の monitor や async external work も runtime layer に落ちうる\n\n「裏で生きていて仕事を進めているもの」を見たら、まず二つ問います。\n\n- これは work graph 上の durable goal か\n- それとも runtime 上の live execution slot か\n\n## 初学者がやりがちな間違い\n\n### 1. background shell の state を task board に直接入れる\n\ndurable task state と runtime execution state が混ざります。\n\n### 2. 1 つの work-graph task は 1 つの runtime task しか持てないと思う\n\n現実の system では、1 つの goal から複数 execution unit が派生することは普通です。\n\n### 3. 両層で同じ status 語彙を使い回す\n\n例えば:\n\n- durable tasks: `pending / in_progress / completed`\n- runtime tasks: `running / completed / failed / killed`\n\n可能な限り分けた方が安全です。\n\n### 4. `output_file` や `notified` のような runtime 専用 field を軽視する\n\ndurable task board はそこまで気にしませんが、runtime layer は強く依存します。\n"
+ },
+ {
+ "version": "s14",
+ "slug": "s14-cron-scheduler",
+ "locale": "ja",
+ "title": "s14: Cron Scheduler",
+ "kind": "chapter",
+ "filename": "s14-cron-scheduler.md",
+ "content": "# s14: Cron Scheduler\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > [ s14 ] > s15 > s16 > s17 > s18 > s19`\n\n> *バックグラウンドタスクが「遅い仕事をどう続けるか」を扱うなら、スケジューラは「未来のいつ仕事を始めるか」を扱う。*\n\n## この章が解決する問題\n\n`s13` で、遅い処理をバックグラウンドへ逃がせるようになりました。\n\nでもそれは「今すぐ始める仕事」です。\n\n現実には:\n\n- 毎晩実行したい\n- 毎週決まった時刻にレポートを作りたい\n- 30 分後に再確認したい\n\nといった未来トリガーが必要になります。\n\nこの章の核心は:\n\n**未来の意図を今記録して、時刻が来たら新しい仕事として戻す**\n\nことです。\n\n## 教学上の境界\n\nこの章の中心は cron 構文の暗記ではありません。\n\n本当に理解すべきなのは:\n\n**schedule record が通知になり、通知が主ループへ戻る流れ**\n\nです。\n\n## 主線とどう併読するか\n\n- `schedule`、`task`、`runtime task` がまだ同じ object に見えるなら、[`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) に戻ります。\n- 1 つの trigger が最終的にどう主線へ戻るかを見たいなら、[`s00b-one-request-lifecycle.md`](./s00b-one-request-lifecycle.md) と一緒に読みます。\n- 未来トリガーが別の実行系に見えてきたら、[`data-structures.md`](./data-structures.md) で schedule record と runtime record を分け直します。\n\n## 最小の心智モデル\n\n```text\n1. schedule records\n2. time checker\n3. notification queue\n```\n\n流れ:\n\n```text\nschedule_create(...)\n ->\n記録を保存\n ->\ntime checker が定期的に一致判定\n ->\n一致したら scheduled notification を積む\n ->\n主ループがそれを新しい仕事として受け取る\n```\n\n重要なのは:\n\n**scheduler 自体は第二の agent ではない**\n\nということです。\n\n## 重要なデータ構造\n\n### 1. schedule record\n\n```python\nschedule = {\n \"id\": \"job_001\",\n \"cron\": \"0 9 * * 1\",\n \"prompt\": \"Run the weekly status report.\",\n \"recurring\": True,\n \"durable\": True,\n \"created_at\": 1710000000.0,\n \"last_fired_at\": None,\n}\n```\n\n### 2. scheduled notification\n\n```python\n{\n \"type\": \"scheduled_prompt\",\n \"schedule_id\": \"job_001\",\n \"prompt\": \"Run the weekly status report.\",\n}\n```\n\n### 3. check interval\n\n教学版なら分単位で十分です。\n\n## 最小実装\n\n```python\ndef create(self, cron_expr: str, prompt: str, recurring: bool = True):\n job = {\n \"id\": new_id(),\n \"cron\": cron_expr,\n \"prompt\": prompt,\n \"recurring\": recurring,\n \"created_at\": time.time(),\n \"last_fired_at\": None,\n }\n self.jobs.append(job)\n return job\n```\n\n```python\ndef check_loop(self):\n while True:\n now = datetime.now()\n self.check_jobs(now)\n time.sleep(60)\n```\n\n```python\ndef check_jobs(self, now):\n for job in self.jobs:\n if cron_matches(job[\"cron\"], now):\n self.queue.put({\n \"type\": \"scheduled_prompt\",\n \"schedule_id\": job[\"id\"],\n \"prompt\": job[\"prompt\"],\n })\n job[\"last_fired_at\"] = now.timestamp()\n```\n\n最後に主ループへ戻します。\n\n```python\nnotifications = scheduler.drain()\nfor item in notifications:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"[scheduled:{item['schedule_id']}] {item['prompt']}\",\n })\n```\n\n## なぜ `s13` の後なのか\n\nこの 2 章は近い問いを扱います。\n\n| 仕組み | 中心の問い |\n|---|---|\n| background tasks | 遅い仕事を止めずにどう続けるか |\n| scheduling | 未来の仕事をいつ始めるか |\n\nこの順序の方が、初学者には自然です。\n\n## 初学者がやりがちな間違い\n\n### 1. cron 構文だけに意識を取られる\n\n### 2. `last_fired_at` を持たない\n\n### 3. スケジュールをメモリにしか置かない\n\n### 4. 未来トリガーの仕事を裏で黙って全部実行する\n\nより分かりやすい主線は:\n\n- trigger\n- notify\n- main loop が処理を決める\n\nです。\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s14_cron_scheduler.py\n```\n"
+ },
+ {
+ "version": "s15",
+ "slug": "s15-agent-teams",
+ "locale": "ja",
+ "title": "s15: Agent Teams",
+ "kind": "chapter",
+ "filename": "s15-agent-teams.md",
+ "content": "# s15: Agent Teams\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > [ s15 ] > s16 > s17 > s18 > s19`\n\n> *subagent は一回きりの委譲に向く。team system が解くのは、「誰かが長く online で残り、繰り返し仕事を受け取り、互いに協調できる」状態です。*\n\n## この章が本当に解きたい問題\n\n`s04` の subagent は、main agent が作業を小さく切り出すのに十分役立ちます。\n\nただし subagent には明確な境界があります。\n\n```text\n生成される\n ->\n少し作業する\n ->\n要約を返す\n ->\n消える\n```\n\nこれは一回きりの調査や短い委譲にはとても向いています。 \nしかし、次のような system を作りたいときには足りません。\n\n- テスト担当の agent を長く待機させる\n- リファクタ担当とテスト担当を並行して持ち続ける\n- ある teammate が後のターンでも同じ責任を持ち続ける\n- lead が後で同じ teammate へ再び仕事を振る\n\nつまり今不足しているのは「model call を 1 回増やすこと」ではありません。\n\n不足しているのは:\n\n**名前・役割・inbox・状態を持った、長期的に存在する実行者の集まり**\n\nです。\n\n## 併読のすすめ\n\n- teammate と `s04` の subagent をまだ同じものに見てしまうなら、[`entity-map.md`](./entity-map.md) に戻ります。\n- `s16-s18` まで続けて読むなら、[`team-task-lane-model.md`](./team-task-lane-model.md) を手元に置き、teammate、protocol request、task、runtime slot、worktree lane を混ぜないようにします。\n- 長く生きる teammate と background 実行の runtime slot が混ざり始めたら、[`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md) で goal / execution の境界を先に固めます。\n\n## まず用語をはっきり分ける\n\n### teammate とは何か\n\nここでの `teammate` は:\n\n> 名前、役割、inbox、lifecycle を持ち、複数ターンにまたがって system 内へ残る agent\n\nのことです。\n\n重要なのは「賢い helper」ではなく、**持続する actor** だという点です。\n\n### roster とは何か\n\n`roster` は team member の名簿です。\n\n少なくとも次を答えられる必要があります。\n\n- 今 team に誰がいるか\n- その人の role は何か\n- その人は idle か、working か、shutdown 済みか\n\n### mailbox とは何か\n\n`mailbox` は各 teammate が持つ受信箱です。\n\n他の member はそこへ message を送ります。 \n受信側は、自分の次の work loop に入る前に mailbox を drain します。\n\nこの設計の利点は、協調が次のように見えることです。\n\n- 誰が誰に送ったか\n- どの member がまだ未読か\n- どの message が actor 間通信なのか\n\n## 最小心智モデル\n\nこの章をいちばん壊れにくく理解する方法は、各 teammate を次のように見ることです。\n\n> 自分の `messages`、自分の mailbox、自分の agent loop を持った長期 actor\n\n```text\nlead\n |\n +-- spawn alice (tester)\n +-- spawn bob (refactorer)\n |\n +-- send message -> alice inbox\n +-- send message -> bob inbox\n\nalice\n |\n +-- 自分の messages\n +-- 自分の inbox\n +-- 自分の agent loop\n\nbob\n |\n +-- 自分の messages\n +-- 自分の inbox\n +-- 自分の agent loop\n```\n\nこの章の一番大事な対比は次です。\n\n- subagent: 一回きりの探索 helper\n- teammate: 長く存在し続ける協調 member\n\n## それまでの章にどう接続するか\n\n`s15` は単に「人数を増やす章」ではありません。 \n`s12-s14` でできた task / runtime / schedule の上に、**長く残る実行者層**を足す章です。\n\n接続の主線は次です。\n\n```text\nlead が「長く担当させたい仕事」を見つける\n ->\nteammate を spawn する\n ->\nteam roster に登録する\n ->\nmailbox に仕事の手がかりや依頼を送る\n ->\nteammate が自分の inbox を drain する\n ->\n自分の agent loop と tools を回す\n ->\n結果を message / task update として返す\n```\n\nここで見失ってはいけない境界は 4 つです。\n\n1. `s12-s14` が作ったのは work layer であり、ここでは actor layer を足している\n2. `s15` の default はまだ lead 主導である\n3. structured protocol は次章 `s16`\n4. autonomous claim は `s17`\n\nつまりこの章は、team system の中でもまだ:\n\n- 名付ける\n- 残す\n- 送る\n- 受け取る\n\nという基礎層を作っている段階です。\n\n## 主要データ構造\n\n### `TeamMember`\n\n```python\nmember = {\n \"name\": \"alice\",\n \"role\": \"tester\",\n \"status\": \"working\",\n}\n```\n\n教学版では、まずこの 3 つが揃っていれば十分です。\n\n- `name`: 誰か\n- `role`: 何を主に担当するか\n- `status`: 今どういう状態か\n\n最初から大量の field を足す必要はありません。 \nこの章で大事なのは「長く存在する actor が立ち上がること」です。\n\n### `TeamConfig`\n\n```python\nconfig = {\n \"team_name\": \"default\",\n \"members\": [member1, member2],\n}\n```\n\n通常は次のような場所に置きます。\n\n```text\n.team/config.json\n```\n\nこの record があると system は再起動後も、\n\n- 以前誰がいたか\n- 誰がどの role を持っていたか\n\nを失わずに済みます。\n\n### `MessageEnvelope`\n\n```python\nmessage = {\n \"type\": \"message\",\n \"from\": \"lead\",\n \"to\": \"alice\",\n \"content\": \"Please review auth module.\",\n \"timestamp\": 1710000000.0,\n}\n```\n\n`envelope` は「本文だけでなくメタ情報も含めて包んだ 1 件の message record」です。\n\nこれを使う理由:\n\n- sender が分かる\n- receiver が分かる\n- message type を分けられる\n- mailbox を durable channel として扱える\n\n## 最小実装の進め方\n\n### Step 1: まず roster を持つ\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.team_dir = team_dir\n self.config_path = team_dir / \"config.json\"\n self.config = self._load_config()\n```\n\nこの章の起点は roster です。 \nroster がないまま team を語ると、結局「今この場で数回呼び出した model たち」にしか見えません。\n\n### Step 2: teammate を spawn する\n\n```python\ndef spawn(self, name: str, role: str, prompt: str):\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n thread.start()\n```\n\nここで大切なのは thread という実装選択そのものではありません。 \n大切なのは次のことです。\n\n**一度 spawn された teammate は、一回限りの tool call ではなく、継続する lifecycle を持つ**\n\n### Step 3: 各 teammate に mailbox を持たせる\n\n教学版で一番分かりやすいのは JSONL inbox です。\n\n```text\n.team/inbox/alice.jsonl\n.team/inbox/bob.jsonl\n```\n\n送信側:\n\n```python\ndef send(self, sender: str, to: str, content: str):\n with open(f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps({\n \"type\": \"message\",\n \"from\": sender,\n \"to\": to,\n \"content\": content,\n \"timestamp\": time.time(),\n }) + \"\\n\")\n```\n\n受信側:\n\n1. すべて読む\n2. JSON として parse する\n3. 読み終わったら inbox を drain する\n\nここで教えたいのは storage trick ではありません。\n\n教えたいのは:\n\n**協調は shared `messages[]` ではなく、mailbox boundary を通して起こる**\n\nという構造です。\n\n### Step 4: teammate は毎ラウンド mailbox を先に確認する\n\n```python\ndef teammate_loop(name: str, role: str, prompt: str):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n\n while True:\n inbox = bus.read_inbox(name)\n for item in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(item)})\n\n response = client.messages.create(...)\n ...\n```\n\nこの step をあいまいにすると、読者はすぐこう誤解します。\n\n- 新しい仕事を与えるたびに teammate を再生成するのか\n- 元の context はどこに残るのか\n\n正しくは:\n\n- teammate は残る\n- messages も残る\n- 新しい仕事は inbox 経由で入る\n- 次ラウンドに入る前に mailbox を見る\n\nです。\n\n## Teammate / Subagent / Runtime Slot をどう分けるか\n\nこの段階で最も混ざりやすいのはこの 3 つです。 \n次の表をそのまま覚えて構いません。\n\n| 仕組み | 何に近いか | lifecycle | 核心境界 |\n|---|---|---|---|\n| subagent | 一回きりの外部委託 helper | 作って、少し働いて、終わる | 小さな探索文脈の隔離 |\n| runtime slot | 実行中の background slot | その実行が終われば消える | 長い execution を追跡する |\n| teammate | 長期に残る team member | idle と working を行き来する | 名前、role、mailbox、独立 loop |\n\n口語的に言い換えると:\n\n- subagent: 「ちょっと調べて戻ってきて」\n- runtime slot: 「これは裏で走らせて、あとで知らせて」\n- teammate: 「あなたは今後しばらくテスト担当ね」\n\n## ここで教えるべき境界\n\nこの章でまず固めるべきは 3 つだけです。\n\n- roster\n- mailbox\n- 独立 loop\n\nこれだけで「長く残る teammate」という実体は十分立ち上がります。\n\nただし、まだここでは教え過ぎない方がよいものがあります。\n\n### 1. protocol request layer\n\nつまり:\n\n- どの message が普通の会話か\n- どの message が `request_id` を持つ構造化 request か\n\nこれは `s16` の範囲です。\n\n### 2. autonomous claim layer\n\nつまり:\n\n- teammate が自分で仕事を探すか\n- どの policy で self-claim するか\n- resume は何を根拠に行うか\n\nこれは `s17` の範囲です。\n\n`s15` の default はあくまで:\n\n- lead が作る\n- lead が送る\n- teammate が受ける\n\nです。\n\n## 初学者が特によくやる間違い\n\n### 1. teammate を「名前付き subagent」にする\n\n名前が付いていても、実装が\n\n```text\nspawn -> work -> summary -> destroy\n```\n\nなら本質的にはまだ subagent です。\n\n### 2. team 全員で 1 本の `messages` を共有する\n\nこれは一見簡単ですが、文脈汚染がすぐ起きます。\n\n各 teammate は少なくとも:\n\n- 自分の messages\n- 自分の inbox\n- 自分の status\n\nを持つべきです。\n\n### 3. roster を durable にしない\n\nsystem を止めた瞬間に「team に誰がいたか」を完全に失うなら、長期 actor layer としてはかなり弱いです。\n\n### 4. mailbox なしで shared variable だけで会話させる\n\n実装は短くできますが、teammate 間協調の境界が見えなくなります。 \n教学 repo では durable mailbox を置いた方が、読者の心智がずっと安定します。\n\n## 学び終わったら言えるべきこと\n\n少なくとも次の 4 つを自分の言葉で説明できれば、この章の主線は掴めています。\n\n1. teammate の本質は「多 model」ではなく「長期に残る actor identity」である\n2. team system の最小構成は「roster + mailbox + 独立 loop」である\n3. subagent と teammate の違いは lifecycle の長さにある\n4. teammate と runtime slot の違いは、「actor identity」か「live execution」かにある\n\n## 次章で何を足すか\n\nこの章が解いているのは:\n\n> team member が長く存在し、互いに message を送り合えるようにすること\n\n次章 `s16` が解くのは:\n\n> message が単なる自由文ではなく、追跡・承認・拒否・期限切れを持つ protocol object になるとき、どう設計するか\n\nつまり `s15` が「team の存在」を作り、`s16` が「team の構造化協調」を作ります。\n"
+ },
+ {
+ "version": "s16",
+ "slug": "s16-team-protocols",
+ "locale": "ja",
+ "title": "s16: Team Protocols",
+ "kind": "chapter",
+ "filename": "s16-team-protocols.md",
+ "content": "# s16: Team Protocols\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > [ s16 ] > s17 > s18 > s19`\n\n> *mailbox があるだけでは「話せる team」に過ぎません。protocol が入って初めて、「規則に従って協調できる team」になります。*\n\n## この章が解く問題\n\n`s15` までで teammate 同士は message を送り合えます。\n\nしかし自由文だけに頼ると、すぐに 2 つの問題が出ます。\n\n- 明確な承認 / 拒否が必要な場面で、曖昧な返事しか残らない\n- request が複数同時に走ると、どの返答がどの件に対応するのか分からなくなる\n\n特に分かりやすいのは次の 2 場面です。\n\n1. graceful shutdown を依頼したい\n2. 高リスク plan を実行前に approval したい\n\n一見別の話に見えても、骨格は同じです。\n\n```text\nrequester が request を送る\n ->\nreceiver が明確に response する\n ->\n両者が同じ request_id で対応関係を追える\n```\n\nこの章で追加するのは message の量ではなく、\n\n**追跡可能な request-response protocol**\n\nです。\n\n## 併読すると楽になる資料\n\n- 普通の message と protocol request が混ざったら [`glossary.md`](./glossary.md) と [`entity-map.md`](./entity-map.md)\n- `s17` や `s18` に進む前に境界を固めたいなら [`team-task-lane-model.md`](./team-task-lane-model.md)\n- request が主システムへどう戻るか見直したいなら [`s00b-one-request-lifecycle.md`](./s00b-one-request-lifecycle.md)\n\n## 先に言葉をそろえる\n\n### protocol とは何か\n\nここでの `protocol` は難しい通信理論ではありません。\n\n意味は、\n\n> message の形、処理手順、状態遷移を事前に決めた協調ルール\n\nです。\n\n### request_id とは何か\n\n`request_id` は request の一意な番号です。\n\n役割は 1 つで、\n\n> 後から届く response や status update を、元の request と正確に結びつけること\n\nです。\n\n### request-response pattern とは何か\n\nこれも難しく考える必要はありません。\n\n```text\nrequester: この操作をしたい\nreceiver: 承認する / 拒否する\n```\n\nこの往復を、自然文の雰囲気で済ませず、**構造化 record として残す**のがこの章です。\n\n## 最小心智モデル\n\n教学上は、protocol を 2 層で見ると分かりやすくなります。\n\n```text\n1. protocol envelope\n2. durable request record\n```\n\n### protocol envelope\n\nこれは inbox を流れる 1 通の構造化 message です。\n\n```python\n{\n \"type\": \"shutdown_request\",\n \"from\": \"lead\",\n \"to\": \"alice\",\n \"request_id\": \"req_001\",\n \"payload\": {},\n}\n```\n\n### durable request record\n\nこれは request の lifecycle を disk に追う record です。\n\n```python\n{\n \"request_id\": \"req_001\",\n \"kind\": \"shutdown\",\n \"from\": \"lead\",\n \"to\": \"alice\",\n \"status\": \"pending\",\n}\n```\n\nこの 2 層がそろうと system は、\n\n- いま何を送ったのか\n- その request は今どの状態か\n\nを両方説明できるようになります。\n\n## この章の核になるデータ構造\n\n### 1. ProtocolEnvelope\n\nprotocol message は普通の message より多くのメタデータを持ちます。\n\n```python\nmessage = {\n \"type\": \"shutdown_request\",\n \"from\": \"lead\",\n \"to\": \"alice\",\n \"request_id\": \"req_001\",\n \"payload\": {},\n \"timestamp\": 1710000000.0,\n}\n```\n\n特に重要なのは次の 3 つです。\n\n- `type`: これは何の protocol message か\n- `request_id`: どの request thread に属するか\n- `payload`: 本文以外の構造化内容\n\n### 2. RequestRecord\n\nrequest record は `.team/requests/` に durable に保存されます。\n\n```python\nrequest = {\n \"request_id\": \"req_001\",\n \"kind\": \"shutdown\",\n \"from\": \"lead\",\n \"to\": \"alice\",\n \"status\": \"pending\",\n \"created_at\": 1710000000.0,\n \"updated_at\": 1710000000.0,\n}\n```\n\nこの record があることで、system は message を送ったあとでも request の状態を追い続けられます。\n\n教材コードでは実際に次のような path を使います。\n\n```text\n.team/requests/\n req_001.json\n req_002.json\n```\n\nこれにより、\n\n- request の状態を再読込できる\n- protocol の途中経過をあとから確認できる\n- main loop が先へ進んでも request thread が消えない\n\nという利点が生まれます。\n\n### 3. 状態機械\n\nこの章の state machine は難しくありません。\n\n```text\npending -> approved\npending -> rejected\npending -> expired\n```\n\nここで大事なのは theory ではなく、\n\n**承認系の協調には「いまどの状態か」を explicit に持つ必要がある**\n\nということです。\n\n## 最小実装を段階で追う\n\n### 第 1 段階: team mailbox の上に protocol line を通す\n\nこの章の本質は新しい message type を 2 個足すことではありません。\n\n本質は、\n\n```text\nrequester が protocol action を開始する\n ->\nrequest record を保存する\n ->\nprotocol envelope を inbox に送る\n ->\nreceiver が request_id 付きで response する\n ->\nrecord の status を更新する\n```\n\nという一本の durable flow を通すことです。\n\n### 第 2 段階: shutdown protocol を作る\n\ngraceful shutdown は「thread を即 kill する」ことではありません。\n\n正しい流れは次です。\n\n1. shutdown request を作る\n2. teammate が approve / reject を返す\n3. approve なら後始末して終了する\n\nrequest 側の最小形はこうです。\n\n```python\ndef request_shutdown(target: str):\n request_id = new_id()\n REQUEST_STORE.create({\n \"request_id\": request_id,\n \"kind\": \"shutdown\",\n \"from\": \"lead\",\n \"to\": target,\n \"status\": \"pending\",\n })\n BUS.send(\n \"lead\",\n target,\n \"Please shut down gracefully.\",\n \"shutdown_request\",\n {\"request_id\": request_id},\n )\n```\n\nresponse 側は request_id を使って同じ record を更新します。\n\n```python\ndef handle_shutdown_response(request_id: str, approve: bool):\n record = REQUEST_STORE.update(\n request_id,\n status=\"approved\" if approve else \"rejected\",\n )\n```\n\n### 第 3 段階: plan approval も同じ骨格で扱う\n\n高リスクな変更を teammate が即時実行してしまうと危険なことがあります。\n\nそこで plan approval protocol を入れます。\n\n```python\ndef submit_plan(name: str, plan_text: str):\n request_id = new_id()\n REQUEST_STORE.create({\n \"request_id\": request_id,\n \"kind\": \"plan_approval\",\n \"from\": name,\n \"to\": \"lead\",\n \"status\": \"pending\",\n \"plan\": plan_text,\n })\n```\n\nlead はその `request_id` を見て承認または却下します。\n\n```python\ndef review_plan(request_id: str, approve: bool, feedback: str = \"\"):\n REQUEST_STORE.update(\n request_id,\n status=\"approved\" if approve else \"rejected\",\n feedback=feedback,\n )\n```\n\nここで伝えたい中心は、\n\n**shutdown と plan approval は中身は違っても、request-response correlation の骨格は同じ**\n\nという点です。\n\n## Message / Protocol / Request / Task の境界\n\nこの章で最も混ざりやすい 4 つを表で分けます。\n\n| オブジェクト | 何を答えるか | 典型 field |\n|---|---|---|\n| `MessageEnvelope` | 誰が誰に何を送ったか | `from`, `to`, `content` |\n| `ProtocolEnvelope` | それが構造化 request / response か | `type`, `request_id`, `payload` |\n| `RequestRecord` | その協調フローはいまどこまで進んだか | `kind`, `status`, `from`, `to` |\n| `TaskRecord` | 実際の work goal は何か | `subject`, `status`, `owner`, `blockedBy` |\n\nここで絶対に混ぜないでほしい点は次です。\n\n- protocol request は task そのものではない\n- request store は task board ではない\n- protocol は協調フローを追う\n- task は仕事の進行を追う\n\n## `s15` から何が増えたか\n\n`s15` の team system は「話せる team」でした。\n\n`s16` ではそこへ、\n\n- request_id\n- durable request store\n- approved / rejected の explicit status\n- protocol-specific message type\n\nが入ります。\n\nすると team は単なる chat 集合ではなく、\n\n**追跡可能な coordination system**\n\nに進みます。\n\n## 初学者が混ぜやすいポイント\n\n### 1. request を普通の text message と同じように扱う\n\nこれでは承認状態を追えません。\n\n### 2. request_id を持たせない\n\n同時に複数 request が走った瞬間に対応関係が壊れます。\n\n### 3. request の状態を memory 内 dict にしか置かない\n\nプロセスをまたいで追えず、観測性も悪くなります。\n\n### 4. approved / rejected を曖昧な文章だけで表す\n\nstate machine が読めなくなります。\n\n### 5. protocol と task を混同する\n\nplan approval request は「plan を実行してよいか」の協調であって、work item 本体ではありません。\n\n## 前の章とどうつながるか\n\nこの章は `s15` の mailbox-based team を次の段階へ押し上げます。\n\n- `s15`: teammate が message を送れる\n- `s16`: teammate が structured protocol で協調できる\n\nそしてこの先、\n\n- `s17`: idle teammate が自分で task を claim する\n- `s18`: task ごとに isolation lane を持つ\n\nへ進む準備になります。\n\nもしここで protocol の境界が曖昧なままだと、後の autonomy や worktree を読むときに\n\n- 誰が誰に依頼したのか\n- どの state が協調の state で、どれが work の state か\n\nがすぐ混ざります。\n\n## 教学上の境界\n\nこの章でまず教えるべきのは、製品に存在しうる全 protocol の一覧ではありません。\n\n中心は次の 3 点です。\n\n- request と response を同じ `request_id` で結び付けること\n- 承認状態を explicit state として残すこと\n- team coordination を自由文から durable workflow へ進めること\n\nここが見えていれば、後から protocol の種類が増えても骨格は崩れません。\n"
+ },
+ {
+ "version": "s17",
+ "slug": "s17-autonomous-agents",
+ "locale": "ja",
+ "title": "s17: Autonomous Agents",
+ "kind": "chapter",
+ "filename": "s17-autonomous-agents.md",
+ "content": "# s17: Autonomous Agents\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > [ s17 ] > s18 > s19`\n\n> *本当にチームらしくなる瞬間は、人数が増えたときではなく、空いている teammate が次の仕事を自分で拾えるようになったときです。*\n\n## この章が解く問題\n\n`s16` まで来ると、チームにはすでに次のものがあります。\n\n- 長く生きる teammate\n- inbox\n- protocol request / response\n- task board\n\nそれでも、まだ 1 つ大きな詰まりが残っています。\n\n**仕事の割り振りが lead に集中しすぎることです。**\n\nたとえば task board に ready な task が 10 個あっても、\n\n- Alice はこれ\n- Bob はこれ\n- Charlie はこれ\n\nと lead が 1 件ずつ指名し続けるなら、team は増えても coordination の中心は 1 人のままです。\n\nこの章で入れるのは、\n\n**空いている teammate が、自分で board を見て、取ってよい task を安全に claim する仕組み**\n\nです。\n\n## 併読すると楽になる資料\n\n- teammate / task / runtime slot の境界が怪しくなったら [`team-task-lane-model.md`](./team-task-lane-model.md)\n- `auto-claim` を読んで runtime record の置き場所が曖昧なら [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n- 長期 teammate と一回限りの subagent の違いが薄れたら [`entity-map.md`](./entity-map.md)\n\n## 先に言葉をそろえる\n\n### 自治とは何か\n\nここで言う `autonomous` は、\n\n> 何の制御もなく勝手に暴走すること\n\nではありません。\n\n正しくは、\n\n> 事前に与えたルールに従って、空いている teammate が次の仕事を自分で選べること\n\nです。\n\nつまり自治は自由放任ではなく、**規則付きの自律再開**です。\n\n### claim とは何か\n\n`claim` は、\n\n> まだ owner が付いていない task を「今から自分が担当する」と確定させること\n\nです。\n\n「見つける」だけでは不十分で、**owner を書き込み、他の teammate が同じ task を取らないようにする**ところまでが claim です。\n\n### idle とは何か\n\n`idle` は終了でも停止でもありません。\n\n意味は次の通りです。\n\n> 今この teammate には active work がないが、まだ system の中で生きていて、新しい input を待てる状態\n\nです。\n\n## 最小心智モデル\n\nこの章を最も簡単に捉えるなら、teammate の lifecycle を 2 フェーズで見ます。\n\n```text\nWORK\n |\n | 今の作業を終える / idle を選ぶ\n v\nIDLE\n |\n +-- inbox に新着がある -> WORK\n |\n +-- task board に claimable task がある -> claim -> WORK\n |\n +-- 一定時間なにもない -> shutdown\n```\n\nここで大事なのは、\n\n**main loop を無限に回し続けることではなく、idle 中に何を見て、どの順番で resume するか**\n\nです。\n\n## この章の核になるデータ構造\n\n### 1. Claimable Predicate\n\n最初に理解すべきなのは、\n\n> どんな task なら「この teammate が今 claim してよい」と判定できるのか\n\nです。\n\n教材コードでは、判定は単に `status == \"pending\"` では終わりません。\n\n```python\ndef is_claimable_task(task: dict, role: str | None = None) -> bool:\n return (\n task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")\n and _task_allows_role(task, role)\n )\n```\n\nこの 4 条件はそれぞれ別の意味を持ちます。\n\n- `status == \"pending\"`: まだ開始していない\n- `not owner`: まだ誰も担当していない\n- `not blockedBy`: 前提 task が残っていない\n- `_task_allows_role(...)`: この teammate の role が claim policy に合っている\n\n最後の条件が特に重要です。\n\ntask は今の教材コードでは次のような role 制約を持てます。\n\n- `claim_role`\n- `required_role`\n\nたとえば、\n\n```python\n{\n \"id\": 7,\n \"subject\": \"Implement login page\",\n \"status\": \"pending\",\n \"owner\": \"\",\n \"blockedBy\": [],\n \"claim_role\": \"frontend\",\n}\n```\n\nなら、空いている teammate 全員が取れるわけではありません。\n\n**frontend role の teammate だけが claim 候補になります。**\n\n### 2. Claim 後の TaskRecord\n\nclaim が成功すると、task record は少なくとも次のように更新されます。\n\n```python\n{\n \"id\": 7,\n \"owner\": \"alice\",\n \"status\": \"in_progress\",\n \"claimed_at\": 1710000000.0,\n \"claim_source\": \"auto\",\n}\n```\n\nこの中で初心者が見落としやすいのは `claimed_at` と `claim_source` です。\n\n- `claimed_at`: いつ取られたか\n- `claim_source`: 手動か自動か\n\nこれがあることで system は、\n\n- 今だれが担当しているか\n- その担当は lead の指名か\n- それとも idle scan による auto-claim か\n\nをあとから説明できます。\n\n### 3. Claim Event Log\n\ntask file の更新だけでは、今の最終状態しか見えません。\n\nそこでこの章では claim 操作を別の append-only log にも書きます。\n\n```text\n.tasks/claim_events.jsonl\n```\n\n中身のイメージはこうです。\n\n```python\n{\n \"event\": \"task.claimed\",\n \"task_id\": 7,\n \"owner\": \"alice\",\n \"role\": \"frontend\",\n \"source\": \"auto\",\n \"ts\": 1710000000.0,\n}\n```\n\nこの log があると、\n\n- task がいつ取られたか\n- 誰が取ったか\n- 手動か自動か\n\nが current state とは別に追えます。\n\n### 4. Durable Request Record\n\n`s17` は autonomy を追加する章ですが、`s16` の protocol line を捨てる章ではありません。\n\nそのため shutdown や plan approval の request は引き続き disk に保存されます。\n\n```text\n.team/requests/{request_id}.json\n```\n\nこれは重要です。\n\nなぜなら autonomous teammate は、\n\n> protocol を無視して好きに動く worker\n\nではなく、\n\n> 既存の protocol system の上で、idle 時に自分で次の仕事を探せる teammate\n\nだからです。\n\n### 5. Identity Block\n\ncompact の後や idle からの復帰直後は、teammate が自分の identity を見失いやすくなります。\n\nそのため教材コードには identity block の再注入があります。\n\n```python\n{\n \"role\": \"user\",\n \"content\": \"You are 'alice', role: frontend, team: default. Continue your work. \",\n}\n```\n\nさらに短い assistant acknowledgement も添えています。\n\n```python\n{\"role\": \"assistant\", \"content\": \"I am alice. Continuing.\"}\n```\n\nこの 2 行は装飾ではありません。\n\nここで守っているのは次の 3 点です。\n\n- 私は誰か\n- どの role か\n- どの team に属しているか\n\n## 最小実装を段階で追う\n\n### 第 1 段階: WORK と IDLE を分ける\n\nまず teammate loop を 2 フェーズに分けます。\n\n```python\nwhile True:\n run_work_phase(...)\n should_resume = run_idle_phase(...)\n if not should_resume:\n break\n```\n\nこれで初めて、\n\n- いま作業中なのか\n- いま待機中なのか\n- 次に resume する理由は何か\n\nを分けて考えられます。\n\n### 第 2 段階: idle では先に inbox を見る\n\n`idle` に入ったら最初に見るべきは task board ではなく inbox です。\n\n```python\ndef idle_phase(name: str, messages: list) -> bool:\n inbox = bus.read_inbox(name)\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": json.dumps(inbox),\n })\n return True\n```\n\n理由は単純で、\n\n**明示的に自分宛てに来た仕事の方が、board 上の一般 task より優先度が高い**\n\nからです。\n\n### 第 3 段階: inbox が空なら role 付きで task board を走査する\n\n```python\nunclaimed = scan_unclaimed_tasks(role)\nif unclaimed:\n task = unclaimed[0]\n claim_result = claim_task(\n task[\"id\"],\n name,\n role=role,\n source=\"auto\",\n )\n```\n\nここでの要点は 2 つです。\n\n- `scan_unclaimed_tasks(role)` は role を無視して全件取るわけではない\n- `source=\"auto\"` を書いて claim の由来を残している\n\nつまり自治とは、\n\n> 何でも空いていれば奪うこと\n\nではなく、\n\n> role、block 状態、owner 状態を見たうえで、今この teammate に許された仕事だけを取ること\n\nです。\n\n### 第 4 段階: claim 後は identity と task hint を両方戻す\n\nclaim 成功後は、そのまま resume してはいけません。\n\n```python\nensure_identity_context(messages, name, role, team_name)\nmessages.append({\n \"role\": \"user\",\n \"content\": f\"Task #{task['id']}: {task['subject']} \",\n})\nmessages.append({\n \"role\": \"assistant\",\n \"content\": f\"{claim_result}. Working on it.\",\n})\nreturn True\n```\n\nこの段で context に戻しているのは 2 種類の情報です。\n\n- identity: この teammate は誰か\n- fresh work item: いま何を始めたのか\n\nこの 2 つがそろって初めて、次の WORK phase が迷わず進みます。\n\n### 第 5 段階: 長時間なにもなければ shutdown する\n\nidle teammate を永久に残す必要はありません。\n\n教材版では、\n\n> 一定時間 inbox も task board も空なら shutdown\n\nという単純な出口で十分です。\n\nここでの主眼は resource policy の最適化ではなく、\n\n**idle からの再開条件と終了条件を明示すること**\n\nです。\n\n## なぜ claim は原子的でなければならないか\n\n`atomic` という言葉は難しく見えますが、ここでは次の意味です。\n\n> claim 処理は「全部成功する」か「起きない」かのどちらかでなければならない\n\n理由は race condition です。\n\nAlice と Bob が同時に同じ task を見たら、\n\n- Alice も `owner == \"\"` を見る\n- Bob も `owner == \"\"` を見る\n- 両方が自分を owner として保存する\n\nという事故が起こりえます。\n\nそのため教材コードでも lock を使っています。\n\n```python\nwith claim_lock:\n task = load(task_id)\n if task[\"owner\"]:\n return \"already claimed\"\n task[\"owner\"] = name\n task[\"status\"] = \"in_progress\"\n save(task)\n```\n\n初心者向けに言い換えるなら、\n\n**claim は「見てから書く」までを他の teammate に割り込まれずに一気に行う**\n\n必要があります。\n\n## identity 再注入が重要な理由\n\nこれは地味ですが、自治の品質を大きく左右します。\n\ncompact の後や long-lived teammate の再開時には、context 冒頭から次の情報が薄れがちです。\n\n- 私は誰か\n- 何 role か\n- どの team か\n\nこの状態で work を再開すると、\n\n- role に合わない判断をしやすくなる\n- protocol 上の責務を忘れやすくなる\n- それまでの persona がぶれやすくなる\n\nだから教材版では、\n\n> idle から戻る前、または compact 後に identity が薄いなら再注入する\n\nという復帰ルールを置いています。\n\n## `s17` は `s16` を上書きしない\n\nここは誤解しやすいので強調します。\n\n`s17` で増えるのは autonomy ですが、だからといって `s16` の protocol layer が消えるわけではありません。\n\n両者はこういう関係です。\n\n```text\ns16:\n request_id を持つ durable protocol\n\ns17:\n idle teammate が board を見て次の仕事を探せる\n```\n\nつまり `s17` は、\n\n**protocol がある team に autonomy を足す章**\n\nであって、\n\n**自由に動く worker 群へ退化させる章**\n\nではありません。\n\n## 前の章とどうつながるか\n\nこの章は前の複数章が初めて強く結びつく場所です。\n\n- `s12`: task board を作る\n- `s15`: persistent teammate を作る\n- `s16`: request / response protocol を作る\n- `s17`: 指名がなくても次の work を自分で取れるようにする\n\nしたがって `s17` は、\n\n**受け身の team から、自分で回り始める team への橋渡し**\n\nと考えると分かりやすいです。\n\n## 自治するのは long-lived teammate であって subagent ではない\n\nここで `s04` と混ざる人が多いです。\n\nこの章の actor は one-shot subagent ではありません。\n\nこの章の teammate は次の特徴を持ちます。\n\n- 名前がある\n- role がある\n- inbox がある\n- idle state がある\n- 複数回 task を受け取れる\n\n一方、subagent は通常、\n\n- 一度 delegated work を受ける\n- 独立 context で処理する\n- summary を返して終わる\n\nという使い方です。\n\nまた、この章で claim する対象は `s12` の task であり、`s13` の runtime slot ではありません。\n\n## 初学者が混ぜやすいポイント\n\n### 1. `pending` だけ見て `blockedBy` を見ない\n\ntask が `pending` でも dependency が残っていればまだ取れません。\n\n### 2. role 条件を無視する\n\n`claim_role` や `required_role` を見ないと、間違った teammate が task を取ります。\n\n### 3. claim lock を置かない\n\n同一 task の二重 claim が起こります。\n\n### 4. idle 中に board しか見ない\n\nこれでは明示的な inbox message を取りこぼします。\n\n### 5. event log を書かない\n\n「いま誰が持っているか」は分かっても、\n\n- いつ取ったか\n- 自動か手動か\n\nが追えません。\n\n### 6. idle teammate を永遠に残す\n\n教材版では shutdown 条件を持たせた方が lifecycle を理解しやすくなります。\n\n### 7. compact 後に identity を戻さない\n\n長く動く teammate ほど、identity drift が起きやすくなります。\n\n## 教学上の境界\n\nこの章でまず掴むべき主線は 1 本です。\n\n**idle で待つ -> 安全に claim する -> identity を整えて work に戻る**\n\nここで学ぶ中心は自治の骨格であって、\n\n- 高度な scheduler 最適化\n- 分散環境での claim\n- 複雑な fairness policy\n\nではありません。\n\nその先へ進む前に、読者が自分の言葉で次の 1 文を言えることが大切です。\n\n> autonomous teammate とは、空いたときに勝手に暴走する worker ではなく、inbox と task board を規則通りに見て、取ってよい仕事だけを自分で取りにいける長期 actor である。\n"
+ },
+ {
+ "version": "s18",
+ "slug": "s18-worktree-task-isolation",
+ "locale": "ja",
+ "title": "s18: Worktree + Task Isolation",
+ "kind": "chapter",
+ "filename": "s18-worktree-task-isolation.md",
+ "content": "# s18: Worktree + Task Isolation\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > [ s18 ] > s19`\n\n> *task board が答えるのは「何をやるか」、worktree が答えるのは「どこでやるか、しかも互いに踏み荒らさずに」です。*\n\n## この章が解く問題\n\n`s17` までで system はすでに次のことができます。\n\n- task を作る\n- teammate が task を claim する\n- 複数の teammate が並行に作業する\n\nそれでも、全員が同じ working directory で作業するなら、すぐに限界が来ます。\n\n典型的な壊れ方は次の通りです。\n\n- 2 つの task が同じ file を同時に編集する\n- 片方の未完了変更がもう片方の task を汚染する\n- 「この task の変更だけ見たい」が非常に難しくなる\n\nつまり `s12-s17` までで答えられていたのは、\n\n**誰が何をやるか**\n\nまでであって、\n\n**その仕事をどの execution lane で進めるか**\n\nはまだ答えられていません。\n\nそれを担当するのが `worktree` です。\n\n## 併読すると楽になる資料\n\n- task / runtime slot / worktree lane が同じものに見えたら [`team-task-lane-model.md`](./team-task-lane-model.md)\n- task record と worktree record に何を保存すべきか確認したいなら [`data-structures.md`](./data-structures.md)\n- なぜ worktree の章が tasks / teams より後ろに来るか再確認したいなら [`s00e-reference-module-map.md`](./s00e-reference-module-map.md)\n\n## 先に言葉をそろえる\n\n### worktree とは何か\n\nGit に慣れている人なら、\n\n> 同じ repository を別ディレクトリへ独立 checkout した作業コピー\n\nと見て構いません。\n\nまだ Git の言葉に慣れていないなら、まずは次の理解で十分です。\n\n> 1 つの task に割り当てる専用の作業レーン\n\n### isolation とは何か\n\n`isolation` は、\n\n> task A は task A の directory で実行し、task B は task B の directory で実行して、未コミット変更を最初から共有しないこと\n\nです。\n\n### binding とは何か\n\n`binding` は、\n\n> task ID と worktree record を明示的に結びつけること\n\nです。\n\nこれがないと、system は「この directory が何のために存在しているのか」を説明できません。\n\n## 最小心智モデル\n\nこの章は 2 枚の表を別物として見ると一気に分かりやすくなります。\n\n```text\nTask Board\n - 何をやるか\n - 誰が持っているか\n - 今どの状態か\n\nWorktree Registry\n - どこでやるか\n - どの branch / path か\n - どの task に結び付いているか\n```\n\n両者は `task_id` でつながります。\n\n```text\n.tasks/task_12.json\n {\n \"id\": 12,\n \"subject\": \"Refactor auth flow\",\n \"status\": \"in_progress\",\n \"worktree\": \"auth-refactor\"\n }\n\n.worktrees/index.json\n {\n \"worktrees\": [\n {\n \"name\": \"auth-refactor\",\n \"path\": \".worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\"\n }\n ]\n }\n```\n\nこの 2 つを見て、\n\n- task は goal を記録する\n- worktree は execution lane を記録する\n\nと分けて理解できれば、この章の幹はつかめています。\n\n## この章の核になるデータ構造\n\n### 1. TaskRecord 側の lane 情報\n\nこの段階の教材コードでは、task 側に単に `worktree` という名前だけがあるわけではありません。\n\n```python\ntask = {\n \"id\": 12,\n \"subject\": \"Refactor auth flow\",\n \"status\": \"in_progress\",\n \"owner\": \"alice\",\n \"worktree\": \"auth-refactor\",\n \"worktree_state\": \"active\",\n \"last_worktree\": \"auth-refactor\",\n \"closeout\": None,\n}\n```\n\nそれぞれの意味は次の通りです。\n\n- `worktree`: 今この task がどの lane に結び付いているか\n- `worktree_state`: その lane が `active` / `kept` / `removed` / `unbound` のどれか\n- `last_worktree`: 直近で使っていた lane 名\n- `closeout`: 最後にどういう終わらせ方をしたか\n\nここが重要です。\n\ntask 側はもはや単に「現在の directory 名」を持っているだけではありません。\n\n**いま結び付いている lane と、最後にどう閉じたかまで記録し始めています。**\n\n### 2. WorktreeRecord\n\nworktree registry 側の record は path の写しではありません。\n\n```python\nworktree = {\n \"name\": \"auth-refactor\",\n \"path\": \".worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\",\n \"last_entered_at\": 1710000000.0,\n \"last_command_at\": 1710000012.0,\n \"last_command_preview\": \"pytest tests/auth -q\",\n \"closeout\": None,\n}\n```\n\nここで答えているのは path だけではありません。\n\n- いつ lane に入ったか\n- 最近何を実行したか\n- どんな closeout が最後に行われたか\n\nつまり worktree record は、\n\n**directory mapping ではなく、観測可能な execution lane record**\n\nです。\n\n### 3. CloseoutRecord\n\ncloseout は「最後に削除したかどうか」だけではありません。\n\n教材コードでは次のような record を残します。\n\n```python\ncloseout = {\n \"action\": \"keep\",\n \"reason\": \"Need follow-up review\",\n \"at\": 1710000100.0,\n}\n```\n\nこれにより system は、\n\n- keep したのか\n- remove したのか\n- なぜそうしたのか\n\nを state として残せます。\n\n初心者にとって大事なのはここです。\n\n**closeout は単なる cleanup コマンドではなく、execution lane の終わり方を明示する操作**\n\nです。\n\n### 4. Event Record\n\nworktree は lifecycle が長いので event log も必要です。\n\n```python\n{\n \"event\": \"worktree.closeout.keep\",\n \"task_id\": 12,\n \"worktree\": \"auth-refactor\",\n \"reason\": \"Need follow-up review\",\n \"ts\": 1710000100.0,\n}\n```\n\nなぜ state file だけでは足りないかというと、lane の lifecycle には複数段階があるからです。\n\n- create\n- enter\n- run\n- keep\n- remove\n- remove failed\n\nappend-only の event があれば、いまの最終状態だけでなく、\n\n**そこへ至る途中の挙動**\n\nも追えます。\n\n## 最小実装を段階で追う\n\n### 第 1 段階: 先に task を作り、そのあと lane を作る\n\n順番は非常に大切です。\n\n```python\ntask = tasks.create(\"Refactor auth flow\")\nworktrees.create(\"auth-refactor\", task_id=task[\"id\"])\n```\n\nこの順番にする理由は、\n\n**worktree は task の代替ではなく、task にぶら下がる execution lane**\n\nだからです。\n\n最初に goal があり、そのあと goal に lane を割り当てます。\n\n### 第 2 段階: worktree を作り、registry に書く\n\n```python\ndef create(self, name: str, task_id: int):\n path = self.root / \".worktrees\" / name\n branch = f\"wt/{name}\"\n\n run_git([\"worktree\", \"add\", \"-b\", branch, str(path), \"HEAD\"])\n\n record = {\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"task_id\": task_id,\n \"status\": \"active\",\n }\n self.index[\"worktrees\"].append(record)\n self._save_index()\n```\n\nここで registry は次を答えられるようになります。\n\n- lane 名\n- 実 directory\n- branch\n- 対応 task\n- active かどうか\n\n### 第 3 段階: task record 側も同時に更新する\n\nlane registry を書くだけでは不十分です。\n\n```python\ndef bind_worktree(task_id: int, name: str):\n task = tasks.load(task_id)\n task[\"worktree\"] = name\n task[\"last_worktree\"] = name\n task[\"worktree_state\"] = \"active\"\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n tasks.save(task)\n```\n\nなぜ両側へ書く必要があるか。\n\nもし registry だけ更新して task board 側を更新しなければ、\n\n- task 一覧から lane が見えない\n- closeout 時にどの task を終わらせるか分かりにくい\n- crash 後の再構成が不自然になる\n\nからです。\n\n### 第 4 段階: lane に入ることと、lane で command を実行することを分ける\n\n教材コードでは `enter` と `run` を分けています。\n\n```python\nworktree_enter(\"auth-refactor\")\nworktree_run(\"auth-refactor\", \"pytest tests/auth -q\")\n```\n\n底では本質的に次のことをしています。\n\n```python\ndef enter(self, name: str):\n self._update_entry(name, last_entered_at=time.time())\n self.events.emit(\"worktree.enter\", ...)\n\ndef run(self, name: str, command: str):\n subprocess.run(command, cwd=worktree_path, ...)\n```\n\n特に大事なのは `cwd=worktree_path` です。\n\n同じ `pytest` でも、どの `cwd` で走るかによって影響範囲が変わります。\n\n`enter` を別操作として教える理由は、読者に次の境界を見せるためです。\n\n- lane を割り当てた\n- 実際にその lane へ入った\n- その lane で command を実行した\n\nこの 3 段階が分かれているからこそ、\n\n- `last_entered_at`\n- `last_command_at`\n- `last_command_preview`\n\nのような観測項目が自然に見えてきます。\n\n### 第 5 段階: 終わるときは closeout を明示する\n\n教材上は、`keep` と `remove` をバラバラの小技として見せるより、\n\n> closeout という 1 つの判断に 2 分岐ある\n\nと見せた方が心智が安定します。\n\n```python\nworktree_closeout(\n name=\"auth-refactor\",\n action=\"keep\", # or \"remove\"\n reason=\"Need follow-up review\",\n complete_task=False,\n)\n```\n\nこれで読者は次のことを一度に理解できます。\n\n- lane の終わらせ方には選択肢がある\n- その選択には理由を持たせられる\n- closeout は task record / lane record / event log に反映される\n\nもちろん実装下層では、\n\n- `worktree_keep(name)`\n- `worktree_remove(name, reason=..., complete_task=True)`\n\nのような分離 API を持っていても構いません。\n\nただし教学の主線では、\n\n**closeout decision -> keep / remove**\n\nという形にまとめた方が初心者には伝わります。\n\n## なぜ `status` と `worktree_state` を分けるのか\n\nこれは非常に大事な区別です。\n\n初学者はよく、\n\n> task に `status` があるなら十分ではないか\n\nと考えます。\n\nしかし実際は答えている質問が違います。\n\n- `task.status`: その仕事が `pending` / `in_progress` / `completed` のどれか\n- `worktree_state`: その execution lane が `active` / `kept` / `removed` / `unbound` のどれか\n\nたとえば、\n\n```text\ntask は completed\nでも worktree は kept\n```\n\nという状態は自然に起こります。\n\nreview 用に directory を残しておきたいからです。\n\nしたがって、\n\n**goal state と lane state は同じ field に潰してはいけません。**\n\n## なぜ worktree は「Git の小技」で終わらないのか\n\n初見では「別 directory を増やしただけ」に見えるかもしれません。\n\nでも教学上の本質はそこではありません。\n\n本当に重要なのは、\n\n**task と execution directory の対応関係を明示 record として持つこと**\n\nです。\n\nそれがあるから system は、\n\n- どの lane がどの task に属するか\n- 完了時に何を closeout すべきか\n- crash 後に何を復元すべきか\n\nを説明できます。\n\n## 前の章とどうつながるか\n\nこの章は前段を次のように結びます。\n\n- `s12`: task ID を与える\n- `s15-s17`: teammate と claim を与える\n- `s18`: 各 task に独立 execution lane を与える\n\n流れで書くとこうです。\n\n```text\ntask を作る\n ->\nteammate が claim する\n ->\nsystem が worktree lane を割り当てる\n ->\ncommands がその lane の directory で走る\n ->\n終了時に keep / remove を選ぶ\n```\n\nここまで来ると multi-agent の並行作業が「同じ場所に集まる chaos」ではなく、\n\n**goal と lane を分けた協調システム**\n\nとして見えてきます。\n\n## worktree は task そのものではない\n\nここは何度でも繰り返す価値があります。\n\n- task は「何をやるか」\n- worktree は「どこでやるか」\n\nです。\n\n同様に、\n\n- runtime slot は「今動いている execution」\n- worktree lane は「どの directory / branch で動くか」\n\nという別軸です。\n\nもしこの辺りが混ざり始めたら、次を開いて整理し直してください。\n\n- [`team-task-lane-model.md`](./team-task-lane-model.md)\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n- [`entity-map.md`](./entity-map.md)\n\n## 初学者が混ぜやすいポイント\n\n### 1. registry だけあって task record に `worktree` がない\n\ntask board から lane の情報が見えなくなります。\n\n### 2. task ID はあるのに command が repo root で走っている\n\n`cwd` が切り替わっていなければ isolation は成立していません。\n\n### 3. `remove` だけを覚えて closeout の意味を教えない\n\n読者は「directory を消す小技」としか理解できなくなります。\n\n### 4. remove 前に dirty state を気にしない\n\n教材版でも最低限、\n\n**消す前に未コミット変更を確認する**\n\nという原則は持たせるべきです。\n\n### 5. `worktree_state` や `closeout` を持たない\n\nlane の終わり方が state として残らなくなります。\n\n### 6. lane を増やすだけで掃除しない\n\n長く使うと registry も directory もすぐ乱れます。\n\n### 7. event log を持たない\n\ncreate / remove failure や binding ミスの調査が極端にやりづらくなります。\n\n## 教学上の境界\n\nこの章でまず教えるべき中心は、製品レベルの Git 運用細目ではありません。\n\n中心は次の 3 行です。\n\n- task が「何をやるか」を記録する\n- worktree が「どこでやるか」を記録する\n- enter / run / closeout が execution lane の lifecycle を構成する\n\nmerge 自動化、複雑な回収 policy、cross-machine execution などは、その幹が見えてからで十分です。\n\nこの章を読み終えた読者が次の 1 文を言えれば成功です。\n\n> task system は仕事の目標を管理し、worktree system はその仕事を安全に進めるための独立レーンを管理する。\n"
+ },
+ {
+ "version": "s19",
+ "slug": "s19-mcp-plugin",
+ "locale": "ja",
+ "title": "s19: MCP & Plugin",
+ "kind": "chapter",
+ "filename": "s19-mcp-plugin.md",
+ "content": "# s19: MCP & Plugin\n\n`s00 > s01 > s02 > s03 > s04 > s05 > s06 > s07 > s08 > s09 > s10 > s11 > s12 > s13 > s14 > s15 > s16 > s17 > s18 > [ s19 ]`\n\n> *すべての能力を主プログラムへ直書きする必要はない。外部能力も同じ routing 面へ接続できる。*\n\n## この章が本当に教えるもの\n\n前の章までは、ツールの多くが自分の Python コード内にありました。\n\nこれは教学として正しい出発点です。\n\nしかしシステムが大きくなると、自然に次の要望が出ます。\n\n> \"外部プログラムの能力を、毎回主プログラムを書き換えずに使えないか?\"\n\nそれに答えるのが MCP です。\n\n## MCP を一番簡単に言うと\n\nMCP は:\n\n**agent が外部 capability server と会話するための標準的な方法**\n\nと考えれば十分です。\n\n主線は次の 4 ステップです。\n\n1. 外部 server を起動する\n2. どんなツールがあるか聞く\n3. 必要な呼び出しをその server へ転送する\n4. 結果を標準化して主ループへ戻す\n\n## なぜ最後の章なのか\n\nMCP は出発点ではありません。\n\n先に理解しておくべきものがあります。\n\n- agent loop\n- tool routing\n- permissions\n- tasks\n- worktree isolation\n\nそれらが見えてからだと、MCP は:\n\n**新しい capability source**\n\nとして自然に理解できます。\n\n## 主線とどう併読するか\n\n- MCP を「遠隔 tool」だけで理解しているなら、[`s19a-mcp-capability-layers.md`](./s19a-mcp-capability-layers.md) を読んで tools、resources、prompts、plugin discovery を 1 つの platform boundary へ戻します。\n- 外部 capability がなぜ同じ execution surface へ戻るのかを確かめたいなら、[`s02b-tool-execution-runtime.md`](./s02b-tool-execution-runtime.md) を併読します。\n- query control と外部 capability routing が頭の中で分離し始めたら、[`s00a-query-control-plane.md`](./s00a-query-control-plane.md) に戻ります。\n\n## 最小の心智モデル\n\n```text\nLLM\n |\n | tool を呼びたい\n v\nAgent tool router\n |\n +-- native tool -> local Python handler\n |\n +-- MCP tool -> external MCP server\n |\n v\n return result\n```\n\n## 重要な 3 要素\n\n### 1. `MCPClient`\n\n役割:\n\n- server へ接続\n- tool 一覧取得\n- tool 呼び出し\n\n### 2. 命名規則\n\n外部ツールとローカルツールが衝突しないように prefix を付けます。\n\n```text\nmcp__{server}__{tool}\n```\n\n例:\n\n```text\nmcp__postgres__query\nmcp__browser__open_tab\n```\n\n### 3. 1 本の unified router\n\n```python\nif tool_name.startswith(\"mcp__\"):\n return mcp_router.call(tool_name, arguments)\nelse:\n return native_handler(arguments)\n```\n\n## Plugin は何をするか\n\nMCP が:\n\n> 外部 server とどう会話するか\n\nを扱うなら、plugin は:\n\n> その server をどう発見し、どう設定するか\n\nを扱います。\n\n最小 plugin は:\n\n```text\n.claude-plugin/\n plugin.json\n```\n\nだけでも十分です。\n\n## 最小設定\n\n```json\n{\n \"name\": \"my-db-tools\",\n \"version\": \"1.0.0\",\n \"mcpServers\": {\n \"postgres\": {\n \"command\": \"npx\",\n \"args\": [\"-y\", \"@modelcontextprotocol/server-postgres\"]\n }\n }\n}\n```\n\nこれは要するに:\n\n> \"この server が必要なら、このコマンドで起動する\"\n\nと主プログラムへ教えているだけです。\n\n## システム全体へどう接続するか\n\nMCP が急に難しく見えるのは、別世界の仕組みとして見てしまうときです。 \nより安定した心智モデルは次です。\n\n```text\nstartup\n ->\nplugin loader が manifest を見つける\n ->\nserver config を取り出す\n ->\nMCP client が connect / list_tools する\n ->\nexternal tools を同じ tool pool に正規化して入れる\n\nruntime\n ->\nLLM が tool_use を出す\n ->\n共有 permission gate\n ->\nnative route または MCP route\n ->\nresult normalization\n ->\n同じ loop へ tool_result を返す\n```\n\n入口は違っても、control plane と execution plane は同じです。\n\n## 重要なデータ構造\n\n### 1. server config\n\n```python\n{\n \"command\": \"npx\",\n \"args\": [\"-y\", \"...\"],\n \"env\": {}\n}\n```\n\n### 2. 標準化された外部ツール定義\n\n```python\n{\n \"name\": \"mcp__postgres__query\",\n \"description\": \"Run a SQL query\",\n \"input_schema\": {...}\n}\n```\n\n### 3. client registry\n\n```python\nclients = {\n \"postgres\": mcp_client_instance\n}\n```\n\n## 絶対に崩してはいけない境界\n\nこの章で最も重要なのは:\n\n**外部ツールも同じ permission 面を通る**\n\nということです。\n\nMCP が permission を素通りしたら、外側に安全穴を開けるだけです。\n\n## Plugin / Server / Tool を同じ層にしない\n\n| 層 | 何か | 何を担当するか |\n|---|---|---|\n| plugin manifest | 設定宣言 | どの server を見つけて起動するかを教える |\n| MCP server | 外部 process / connection | 能力の集合を expose する |\n| MCP tool | server が出す 1 つの callable capability | モデルが実際に呼ぶ対象 |\n\n最短で覚えるなら:\n\n- plugin = discovery\n- server = connection\n- tool = invocation\n\n## 初学者が迷いやすい点\n\n### 1. いきなりプロトコル細部へ入る\n\n先に見るべきは capability routing です。\n\n### 2. MCP を別世界だと思う\n\n実際には、同じ routing、同じ permission、同じ result append に戻します。\n\n### 3. 正規化を省く\n\n外部ツールをローカルツールと同じ形へ揃えないと、後の心智が急に重くなります。\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s19_mcp_plugin.py\n```\n"
+ },
+ {
+ "version": null,
+ "slug": "s19a-mcp-capability-layers",
+ "locale": "ja",
+ "title": "s19a: MCP Capability Layers",
+ "kind": "bridge",
+ "filename": "s19a-mcp-capability-layers.md",
+ "content": "# s19a: MCP Capability Layers\n\n> `s19` の主線は引き続き tools-first で進めるべきです。\n> その上で、この bridge doc は次の心智を足します。\n>\n> **MCP は単なる外部 tool 接続ではなく、複数の capability layer を持つ platform です。**\n\n## 主線とどう併読するか\n\nMCP を主線から外れずに学ぶなら次の順がよいです。\n\n- まず [`s19-mcp-plugin.md`](./s19-mcp-plugin.md) を読み、tools-first の入口を固める\n- 次に [`s02a-tool-control-plane.md`](./s02a-tool-control-plane.md) を見直し、外部 capability がどう unified tool bus に戻るかを見る\n- state record が混ざり始めたら [`data-structures.md`](./data-structures.md) を見直す\n- concept boundary が混ざり始めたら [`glossary.md`](./glossary.md) と [`entity-map.md`](./entity-map.md) を見直す\n\n## なぜ別立てで必要か\n\n教材 repo として、正文を external tools から始めるのは正しいです。\n\n最も入りやすい入口は:\n\n- 外部 server に接続する\n- tool 定義を受け取る\n- tool を呼ぶ\n- 結果を agent へ戻す\n\nしかし完成度を上げようとすると、すぐ次の問いに出会います。\n\n- server は stdio / HTTP / SSE / WebSocket のどれでつながるのか\n- なぜ `connected` の server もあれば `pending` や `needs-auth` の server もあるのか\n- resources や prompts は tools とどう並ぶのか\n- elicitation はなぜ特別な対話になるのか\n- OAuth のような auth flow はどの層で理解すべきか\n\ncapability-layer map がないと、MCP は急に散らばって見えます。\n\n## まず用語\n\n### capability layer とは\n\ncapability layer は:\n\n> 大きな system の中の 1 つの責務面\n\nです。\n\nMCP のすべてを 1 つの袋に入れないための考え方です。\n\n### transport とは\n\ntransport は接続通路です。\n\n- stdio\n- HTTP\n- SSE\n- WebSocket\n\n### elicitation とは\n\nこれは見慣れない用語ですが、教材版では次の理解で十分です。\n\n> MCP server 側が追加情報を要求し、user からさらに入力を引き出す対話\n\nつまり常に:\n\n> agent calls tool -> tool returns result\n\nだけとは限らず、server 側から:\n\n> 続けるためにもっと入力が必要\n\nと言ってくる場合があります。\n\n## 最小の心智モデル\n\nMCP を 6 層で見ると整理しやすいです。\n\n```text\n1. Config Layer\n server 設定がどう表現されるか\n\n2. Transport Layer\n 何の通路で接続するか\n\n3. Connection State Layer\n connected / pending / failed / needs-auth\n\n4. Capability Layer\n tools / resources / prompts / elicitation\n\n5. Auth Layer\n 認証が必要か、認証状態は何か\n\n6. Router Integration Layer\n tool routing / permission / notifications にどう戻るか\n```\n\nここで最重要なのは:\n\n**tools は一層であって、MCP の全体ではない**\n\nという点です。\n\n## なぜ正文は tools-first のままでよいか\n\n教材として大事なポイントです。\n\nMCP に複数 layer があっても、正文主線はまず次で十分です。\n\n### Step 1: 外部 tools から入る\n\nこれは読者がすでに学んだものと最も自然につながります。\n\n- local tools\n- external tools\n- 1 本の shared router\n\n### Step 2: その上で他の layer があると知らせる\n\n例えば:\n\n- resources\n- prompts\n- elicitation\n- auth\n\n### Step 3: どこまで実装するかを決める\n\nこれが教材 repo の目的に合っています。\n\n**まず似た system を作り、その後で platform layer を厚くする**\n\n## 主要 record\n\n### 1. `ScopedMcpServerConfig`\n\n教材版でも最低限この概念は見せるべきです。\n\n```python\nconfig = {\n \"name\": \"postgres\",\n \"type\": \"stdio\",\n \"command\": \"npx\",\n \"args\": [\"-y\", \"...\"],\n \"scope\": \"project\",\n}\n```\n\n`scope` が重要なのは、server config が 1 つの場所からだけ来るとは限らないからです。\n\n### 2. MCP connection state\n\n```python\nserver_state = {\n \"name\": \"postgres\",\n \"status\": \"connected\", # pending / failed / needs-auth / disabled\n \"config\": {...},\n}\n```\n\n### 3. `MCPToolSpec`\n\n```python\ntool = {\n \"name\": \"mcp__postgres__query\",\n \"description\": \"...\",\n \"input_schema\": {...},\n}\n```\n\n### 4. `ElicitationRequest`\n\n```python\nrequest = {\n \"server_name\": \"some-server\",\n \"message\": \"Please provide additional input\",\n \"requested_schema\": {...},\n}\n```\n\nここでの教材上の要点は、elicitation を今すぐ全部実装することではありません。\n\n要点は:\n\n**MCP は常に一方向の tool invocation だけとは限らない**\n\nという点です。\n\n## より整理された図\n\n```text\nMCP Config\n |\n v\nTransport\n |\n v\nConnection State\n |\n +-- connected\n +-- pending\n +-- needs-auth\n +-- failed\n |\n v\nCapabilities\n +-- tools\n +-- resources\n +-- prompts\n +-- elicitation\n |\n v\nRouter / Permission / Notification Integration\n```\n\n## なぜ auth を主線の中心にしない方がよいか\n\nauth は platform 全体では本物の layer です。\n\nしかし正文が早い段階で OAuth や vendor 固有 detail へ落ちると、初学者は system shape を失います。\n\n教材としては次の順がよいです。\n\n- まず auth layer が存在すると知らせる\n- 次に `connected` と `needs-auth` が違う connection state だと教える\n- さらに進んだ platform work の段階で auth state machine を詳しく扱う\n\nこれなら正確さを保ちつつ、主線を壊しません。\n\n## `s19` と `s02a` との関係\n\n- `s19` 本文は tools-first の external capability path を教える\n- この note は broader platform map を補う\n- `s02a` は MCP capability が unified tool control plane にどう戻るかを補う\n\n三つを合わせて初めて、読者は本当の構図を持てます。\n\n**MCP は外部 capability platform であり、tools はその最初の切り口にすぎない**\n\n## 初学者がやりがちな間違い\n\n### 1. MCP を外部 tool catalog だけだと思う\n\nその理解だと resources / prompts / auth / elicitation が後で急に見えて混乱します。\n\n### 2. transport や OAuth detail に最初から沈み込む\n\nこれでは主線が壊れます。\n\n### 3. MCP tool を permission の外に置く\n\nsystem boundary に危険な横穴を開けます。\n\n### 4. server config・connection state・exposed capabilities を一つに混ぜる\n\nこの三層は概念的に分けておくべきです。\n"
+ },
+ {
+ "version": null,
+ "slug": "teaching-scope",
+ "locale": "ja",
+ "title": "教材の守備範囲",
+ "kind": "bridge",
+ "filename": "teaching-scope.md",
+ "content": "# 教材の守備範囲\n\n> この文書は、この教材が何を教え、何を意図的に主線から外すかを明示するためのものです。\n\n## この教材の目標\n\nこれは、ある実運用コードベースを逐行で注釈するためのリポジトリではありません。\n\n本当の目標は:\n\n**高完成度の coding-agent harness を 0 から自力で作れるようにすること**\n\nです。\n\nそのために守るべき条件は 3 つあります。\n\n1. 学習者が本当に自分で作り直せること\n2. 主線が side detail に埋もれないこと\n3. 実在しない mechanism を学ばせないこと\n\n## 主線章で必ず明示すべきこと\n\n各章は次をはっきりさせるべきです。\n\n- その mechanism が何の問題を解くか\n- どの module / layer に属するか\n- どんな state を持つか\n- どんな data structure を導入するか\n- loop にどうつながるか\n- runtime flow がどう変わるか\n\n## 主線を支配させない方がよいもの\n\n次の話題は存在してよいですが、初心者向け主線の中心に置くべきではありません。\n\n- packaging / build / release flow\n- cross-platform compatibility glue\n- telemetry / enterprise policy wiring\n- historical compatibility branches\n- product 固有の naming accident\n- 上流コードとの逐行一致\n\n## ここでいう高忠実度とは何か\n\n高忠実度とは、すべての周辺 detail を 1:1 で再現することではありません。\n\nここで寄せるべき対象は:\n\n- core runtime model\n- module boundaries\n- key records\n- state transitions\n- major subsystem cooperation\n\nつまり:\n\n**幹には忠実に、枝葉は教材として意識的に簡略化する**\n\nということです。\n\n## 想定読者\n\n標準的な想定読者は:\n\n- 基本的な Python は読める\n- 関数、クラス、list、dict は分かる\n- ただし agent platform は初学者でもよい\n\nしたがって文章は:\n\n- 先に概念を説明する\n- 1つの概念を1か所で完結させる\n- `what -> why -> how` の順で進める\n\nのが望ましいです。\n\n## 各章の推奨構成\n\n1. これが無いと何が困るか\n2. 先に新しい言葉を説明する\n3. 最小の心智モデルを示す\n4. 主要 record / data structure を示す\n5. 最小で正しい実装を示す\n6. loop への接続点を示す\n7. 初学者がやりがちな誤りを示す\n8. 高完成度版で後から足すものを示す\n\n## 用語の扱い\n\n次の種類の語が出るときは、名前だけ投げず意味を説明した方がよいです。\n\n- design pattern\n- data structure\n- concurrency term\n- protocol / networking term\n- 一般的ではない engineering vocabulary\n\n例:\n\n- state machine\n- scheduler\n- queue\n- worktree\n- DAG\n- protocol envelope\n\n## 最小正解版の原則\n\n現実の mechanism は複雑でも、教材は最初から全分岐を見せる必要はありません。\n\nよい順序は:\n\n1. 最小で正しい版を示す\n2. それで既に解ける core problem を示す\n3. 後で何を足すかを示す\n\n例:\n\n- permission: `deny -> mode -> allow -> ask`\n- error recovery: 主要な回復枝から始める\n- task system: records / dependencies / unlocks から始める\n- team protocol: request / response + `request_id` から始める\n\n## 逆向きソースの使い方\n\n逆向きで得たソースは:\n\n**保守者の校正材料**\n\nとして使うのが正しいです。\n\n役割は:\n\n- 主線 mechanism の説明がズレていないか確かめる\n- 重要な境界や record が抜けていないか確かめる\n- 教材実装が fiction に流れていないか確かめる\n\n読者がそれを見ないと本文を理解できない構成にしてはいけません。\n\n## 一文で覚える\n\n**よい教材は、細部をたくさん言うことより、重要な細部を完全に説明し、重要でない細部を安全に省くことによって質が決まります。**\n"
+ },
+ {
+ "version": null,
+ "slug": "team-task-lane-model",
"locale": "ja",
- "title": "s12: Worktree + Task Isolation",
- "content": "# s12: Worktree + Task Isolation\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"各自のディレクトリで作業し、互いに干渉しない\"* -- タスクは目標を管理、worktree はディレクトリを管理、IDで紐付け。\n\n## 問題\n\ns11までにエージェントはタスクを自律的に確保して完了できるようになった。しかし全タスクが1つの共有ディレクトリで走る。2つのエージェントが同時に異なるモジュールをリファクタリングすると衝突する: 片方が`config.py`を編集し、もう片方も`config.py`を編集し、未コミットの変更が混ざり合い、どちらもクリーンにロールバックできない。\n\nタスクボードは*何をやるか*を追跡するが、*どこでやるか*には関知しない。解決策: 各タスクに専用のgit worktreeディレクトリを与える。タスクが目標を管理し、worktreeが実行コンテキストを管理する。タスクIDで紐付ける。\n\n## 解決策\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## 仕組み\n\n1. **タスクを作成する。** まず目標を永続化する。\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **worktreeを作成してタスクに紐付ける。** `task_id`を渡すと、タスクが自動的に`in_progress`に遷移する。\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\n紐付けは両側に状態を書き込む:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **worktree内でコマンドを実行する。** `cwd`が分離ディレクトリを指す。\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **終了処理。** 2つの選択肢:\n - `worktree_keep(name)` -- ディレクトリを保持する。\n - `worktree_remove(name, complete_task=True)` -- ディレクトリを削除し、紐付けられたタスクを完了し、イベントを発行する。1回の呼び出しで後片付けと完了を処理する。\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **イベントストリーム。** ライフサイクルの各ステップが`.worktrees/events.jsonl`に記録される:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\n発行されるイベント: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\n\nクラッシュ後も`.tasks/` + `.worktrees/index.json`から状態を再構築できる。会話メモリは揮発性だが、ファイル状態は永続的だ。\n\n## s11からの変更点\n\n| Component | Before (s11) | After (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n"
+ "title": "Team Task Lane Model",
+ "kind": "bridge",
+ "filename": "team-task-lane-model.md",
+ "content": "# Team Task Lane Model\n\n> `s15-s18` に入ると、関数名よりも先に混ざりやすいものがあります。\n>\n> それは、\n>\n> **誰が働き、誰が調整し、何が目標を記録し、何が実行レーンを提供しているのか**\n>\n> という層の違いです。\n\n## この橋渡し資料が解決すること\n\n`s15-s18` を通して読むと、次の言葉が一つの曖昧な塊になりやすくなります。\n\n- teammate\n- protocol request\n- task\n- runtime task\n- worktree\n\n全部「仕事が進む」ことに関係していますが、同じ層ではありません。\n\nここを分けないと、後半が急に分かりにくくなります。\n\n- teammate は task と同じなのか\n- `request_id` と `task_id` は何が違うのか\n- worktree は runtime task の一種なのか\n- task が終わっているのに、なぜ worktree が kept のままなのか\n\nこの資料は、その層をきれいに分けるためのものです。\n\n## 読む順番\n\n1. [`s15-agent-teams.md`](./s15-agent-teams.md) で長寿命 teammate を確認する\n2. [`s16-team-protocols.md`](./s16-team-protocols.md) で追跡可能な request-response を確認する\n3. [`s17-autonomous-agents.md`](./s17-autonomous-agents.md) で自律 claim を確認する\n4. [`s18-worktree-task-isolation.md`](./s18-worktree-task-isolation.md) で隔離 execution lane を確認する\n\n用語が混ざってきたら、次も見直してください。\n\n- [`entity-map.md`](./entity-map.md)\n- [`data-structures.md`](./data-structures.md)\n- [`s13a-runtime-task-model.md`](./s13a-runtime-task-model.md)\n\n## まずはこの区別を固定する\n\n```text\nteammate\n = 長期に協力する主体\n\nprotocol request\n = チーム内で追跡される調整要求\n\ntask\n = 何をやるべきか\n\nruntime task / execution slot\n = 今まさに動いている実行単位\n\nworktree\n = 他の変更とぶつからずに仕事を進める実行ディレクトリ\n```\n\n特に混ざりやすいのは最後の3つです。\n\n- `task`\n- `runtime task`\n- `worktree`\n\n毎回、次の3つを別々に問い直してください。\n\n- これは目標か\n- これは実行中の単位か\n- これは隔離された実行ディレクトリか\n\n## 一番小さい図\n\n```text\nTeam Layer\n teammate: alice (frontend)\n\nProtocol Layer\n request_id=req_01\n kind=plan_approval\n status=pending\n\nWork Graph Layer\n task_id=12\n subject=\"Implement login page\"\n owner=\"alice\"\n status=\"in_progress\"\n\nRuntime Layer\n runtime_id=rt_01\n type=in_process_teammate\n status=running\n\nExecution Lane Layer\n worktree=login-page\n path=.worktrees/login-page\n status=active\n```\n\nこの中で、仕事そのものの目標を表しているのは一つだけです。\n\n> `task_id=12`\n\n他は、その目標のまわりで協調・実行・分離を支える層です。\n\n## 1. Teammate: 誰が協力しているか\n\n`s15` で導入される層です。\n\nここが答えること:\n\n- 長寿命 worker の名前\n- 役割\n- `working` / `idle` / `shutdown`\n- 独立した inbox を持つか\n\n例:\n\n```python\nmember = {\n \"name\": \"alice\",\n \"role\": \"frontend\",\n \"status\": \"idle\",\n}\n```\n\n大事なのは「agent をもう1個増やす」ことではありません。\n\n> 繰り返し仕事を受け取れる長寿命の身元\n\nこれが本質です。\n\n## 2. Protocol Request: 何を調整しているか\n\n`s16` の層です。\n\nここが答えること:\n\n- 誰が誰に依頼したか\n- どんな種類の request か\n- pending なのか、もう解決済みなのか\n\n例:\n\n```python\nrequest = {\n \"request_id\": \"a1b2c3d4\",\n \"kind\": \"plan_approval\",\n \"from\": \"alice\",\n \"to\": \"lead\",\n \"status\": \"pending\",\n}\n```\n\nこれは普通の会話ではありません。\n\n> 状態更新を続けられる調整記録\n\nです。\n\n## 3. Task: 何をやるのか\n\nこれは `s12` の durable work-graph task であり、`s17` で teammate が claim する対象です。\n\nここが答えること:\n\n- 目標は何か\n- 誰が担当しているか\n- 何にブロックされているか\n- 進捗状態はどうか\n\n例:\n\n```python\ntask = {\n \"id\": 12,\n \"subject\": \"Implement login page\",\n \"status\": \"in_progress\",\n \"owner\": \"alice\",\n \"blockedBy\": [],\n}\n```\n\nキーワードは:\n\n**目標**\n\nディレクトリでも、protocol でも、process でもありません。\n\n## 4. Runtime Task / Execution Slot: 今なにが走っているか\n\nこの層は `s13` の橋渡し資料ですでに説明されていますが、`s15-s18` ではさらに重要になります。\n\n例:\n\n- background shell が走っている\n- 長寿命 teammate が今作業している\n- monitor が外部状態を見ている\n\nこれらは、\n\n> 実行中の slot\n\nとして理解するのが一番きれいです。\n\n例:\n\n```python\nruntime = {\n \"id\": \"rt_01\",\n \"type\": \"in_process_teammate\",\n \"status\": \"running\",\n \"work_graph_task_id\": 12,\n}\n```\n\n大事な境界:\n\n- 1つの task から複数の runtime task が派生しうる\n- runtime task は durable な目標そのものではなく、実行インスタンスである\n\n## 5. Worktree: どこでやるのか\n\n`s18` で導入される execution lane 層です。\n\nここが答えること:\n\n- どの隔離ディレクトリを使うか\n- どの task と結び付いているか\n- その lane は `active` / `kept` / `removed` のどれか\n\n例:\n\n```python\nworktree = {\n \"name\": \"login-page\",\n \"path\": \".worktrees/login-page\",\n \"task_id\": 12,\n \"status\": \"active\",\n}\n```\n\nキーワードは:\n\n**実行境界**\n\ntask そのものではなく、その task を進めるための隔離レーンです。\n\n## 層はどうつながるか\n\n```text\nteammate\n protocol request で協調し\n task を claim し\n execution slot として走り\n worktree lane の中で作業する\n```\n\nもっと具体的に言うなら:\n\n> `alice` が `task #12` を claim し、`login-page` worktree lane の中でそれを進める\n\nこの言い方は、\n\n> \"alice is doing the login-page worktree task\"\n\nのような曖昧な言い方よりずっと正確です。\n\n後者は次の3層を一つに潰してしまいます。\n\n- teammate\n- task\n- worktree\n\n## よくある間違い\n\n### 1. teammate と task を同じものとして扱う\n\nteammate は実行者、task は目標です。\n\n### 2. `request_id` と `task_id` を同じ種類の ID だと思う\n\n片方は調整、片方は目標です。\n\n### 3. runtime slot を durable task だと思う\n\n実行は終わっても、durable task は残ることがあります。\n\n### 4. worktree を task そのものだと思う\n\nworktree は execution lane でしかありません。\n\n### 5. 「並列で動く」とだけ言って層の名前を出さない\n\n良い教材は「agent がたくさんいる」で止まりません。\n\n次のように言える必要があります。\n\n> teammate は長期協力を担い、request は調整を追跡し、task は目標を記録し、runtime slot は実行を担い、worktree は実行ディレクトリを隔離する。\n\n## 読み終えたら言えるようになってほしいこと\n\n1. `s17` の自律 claim は `s12` の work-graph task を取るのであって、`s13` の runtime slot を取るのではない。\n2. `s18` の worktree は task に execution lane を結び付けるのであって、task をディレクトリへ変えるのではない。\n"
}
]
\ No newline at end of file
diff --git a/web/src/data/generated/versions.json b/web/src/data/generated/versions.json
index 0af62b7b5..46fbee5aa 100644
--- a/web/src/data/generated/versions.json
+++ b/web/src/data/generated/versions.json
@@ -4,38 +4,64 @@
"id": "s01",
"filename": "s01_agent_loop.py",
"title": "The Agent Loop",
- "subtitle": "Bash is All You Need",
- "loc": 84,
+ "subtitle": "Minimal Closed Loop",
+ "loc": 130,
"tools": [
"bash"
],
"newTools": [
"bash"
],
- "coreAddition": "Single-tool agent loop",
- "keyInsight": "The minimal agent kernel is a while loop + one tool",
- "classes": [],
+ "coreAddition": "LoopState + tool_result feedback",
+ "keyInsight": "An agent is just a loop: send messages, execute tools, feed results back, repeat.",
+ "classes": [
+ {
+ "name": "LoopState",
+ "startLine": 61,
+ "endLine": 67
+ }
+ ],
"functions": [
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
- "startLine": 53
+ "startLine": 68
+ },
+ {
+ "name": "extract_text",
+ "signature": "def extract_text(content)",
+ "startLine": 90
+ },
+ {
+ "name": "execute_tool_calls",
+ "signature": "def execute_tool_calls(response_content)",
+ "startLine": 101
+ },
+ {
+ "name": "run_one_turn",
+ "signature": "def run_one_turn(state: LoopState)",
+ "startLine": 118
},
{
"name": "agent_loop",
- "signature": "def agent_loop(messages: list)",
- "startLine": 67
+ "signature": "def agent_loop(state: LoopState)",
+ "startLine": 143
}
],
- "layer": "tools",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns01_agent_loop.py - The Agent Loop\n\nThe entire secret of an AI coding agent in one pattern:\n\n while stop_reason == \"tool_use\":\n response = LLM(messages, tools)\n execute tools\n append results\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tool |\n | prompt | | | | execute |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThis is the core loop: feed tool results back to the model\nuntil the model decides to stop. Production agents layer\npolicy, hooks, and lifecycle controls on top.\n\"\"\"\n\nimport os\nimport subprocess\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {os.getcwd()}. Use bash to solve tasks. Act, don't explain.\"\n\nTOOLS = [{\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n}]\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=os.getcwd(),\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\n# -- The core pattern: a while loop that calls tools until the model stops --\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n # Append assistant turn\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n # If the model didn't call a tool, we're done\n if response.stop_reason != \"tool_use\":\n return\n # Execute each tool call, collect results\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n print(f\"\\033[33m$ {block.input['command']}\\033[0m\")\n output = run_bash(block.input[\"command\"])\n print(output[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms01 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ "layer": "core",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: the loop -- keep feeding real tool results back into the model.\n\"\"\"\ns01_agent_loop.py - The Agent Loop\n\nThis file teaches the smallest useful coding-agent pattern:\n\n user message\n -> model reply\n -> if tool_use: execute tools\n -> write tool_result back to messages\n -> continue\n\nIt intentionally keeps the loop small, but still makes the loop state explicit\nso later chapters can grow from the same structure.\n\"\"\"\n\nimport os\nimport subprocess\nfrom dataclasses import dataclass\n\ntry:\n import readline\n # #143 UTF-8 backspace fix for macOS libedit\n readline.parse_and_bind('set bind-tty-special-chars off')\n readline.parse_and_bind('set input-meta on')\n readline.parse_and_bind('set output-meta on')\n readline.parse_and_bind('set convert-meta off')\n readline.parse_and_bind('set enable-meta-keybindings on')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = (\n f\"You are a coding agent at {os.getcwd()}. \"\n \"Use bash to inspect and change the workspace. Act first, then report clearly.\"\n)\n\nTOOLS = [{\n \"name\": \"bash\",\n \"description\": \"Run a shell command in the current workspace.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n}]\n\n\n@dataclass\nclass LoopState:\n # The minimal loop state: history, loop count, and why we continue.\n messages: list\n turn_count: int = 1\n transition_reason: str | None = None\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=os.getcwd(),\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n except (FileNotFoundError, OSError) as e:\n return f\"Error: {e}\"\n\n output = (result.stdout + result.stderr).strip()\n return output[:50000] if output else \"(no output)\"\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef execute_tool_calls(response_content) -> list[dict]:\n results = []\n for block in response_content:\n if block.type != \"tool_use\":\n continue\n command = block.input[\"command\"]\n print(f\"\\033[33m$ {command}\\033[0m\")\n output = run_bash(command)\n print(output[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n return results\n\n\ndef run_one_turn(state: LoopState) -> bool:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=state.messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n state.messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n state.transition_reason = None\n return False\n\n results = execute_tool_calls(response.content)\n if not results:\n state.transition_reason = None\n return False\n\n state.messages.append({\"role\": \"user\", \"content\": results})\n state.turn_count += 1\n state.transition_reason = \"tool_result\"\n return True\n\n\ndef agent_loop(state: LoopState) -> None:\n while run_one_turn(state):\n pass\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms01 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n state = LoopState(messages=history)\n agent_loop(state)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): loop(循环)——把真实工具结果持续回灌给模型。\n\"\"\"\ns01_agent_loop.py - Agent Loop(智能体循环)\n\n本文件讲解最小可用的编码智能体模式:\n\n user message(用户消息)\n -> model reply(模型回复)\n -> 若出现 tool_use(工具调用):执行工具\n -> 将 tool_result(工具结果)写回 messages(消息历史)\n -> 继续下一轮\n\n这里故意保持最小闭环,但依然把循环状态显式化,\n这样后续章节可以在同一结构上逐层扩展。\n\"\"\"\n\nimport os\nimport subprocess\nfrom dataclasses import dataclass\n\ntry:\n import readline\n # #143:修复 macOS libedit 对 UTF-8 退格键处理异常。\n readline.parse_and_bind('set bind-tty-special-chars off')\n readline.parse_and_bind('set input-meta on')\n readline.parse_and_bind('set output-meta on')\n readline.parse_and_bind('set convert-meta off')\n readline.parse_and_bind('set enable-meta-keybindings on')\nexcept ImportError:\n pass\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = (\n f\"你是位于 {os.getcwd()} 的 coding agent(编码智能体)。\"\n \"使用 bash 检查并修改当前工作区。先行动,再清晰汇报。\"\n)\n\nTOOLS = [{\n \"name\": \"bash\",\n \"description\": \"在当前工作区执行 shell 命令。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n}]\n\n\n@dataclass\nclass LoopState:\n # 最小循环状态:消息历史、轮次计数、以及继续循环的原因。\n messages: list\n turn_count: int = 1\n transition_reason: str | None = None\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=os.getcwd(),\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n except (FileNotFoundError, OSError) as e:\n return f\"Error: {e}\"\n\n output = (result.stdout + result.stderr).strip()\n return output[:50000] if output else \"(no output)\"\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef execute_tool_calls(response_content) -> list[dict]:\n results = []\n for block in response_content:\n if block.type != \"tool_use\":\n continue\n command = block.input[\"command\"]\n print(f\"\\033[33m$ {command}\\033[0m\")\n output = run_bash(command)\n print(output[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n return results\n\n\ndef run_one_turn(state: LoopState) -> bool:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=state.messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n state.messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n state.transition_reason = None\n return False\n\n results = execute_tool_calls(response.content)\n if not results:\n state.transition_reason = None\n return False\n\n state.messages.append({\"role\": \"user\", \"content\": results})\n state.turn_count += 1\n state.transition_reason = \"tool_result\"\n return True\n\n\ndef agent_loop(state: LoopState) -> None:\n while run_one_turn(state):\n pass\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms01 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n state = LoopState(messages=history)\n agent_loop(state)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: the loop -- keep feeding real tool results back into the model.\n\"\"\"\ns01_agent_loop.py - The Agent Loop\n\nThis file teaches the smallest useful coding-agent pattern:\n\n user message\n -> model reply\n -> if tool_use: execute tools\n -> write tool_result back to messages\n -> continue\n\nIt intentionally keeps the loop small, but still makes the loop state explicit\nso later chapters can grow from the same structure.\n\"\"\"\n\nimport os\nimport subprocess\nfrom dataclasses import dataclass\n\ntry:\n import readline\n # #143 UTF-8 backspace fix for macOS libedit\n readline.parse_and_bind('set bind-tty-special-chars off')\n readline.parse_and_bind('set input-meta on')\n readline.parse_and_bind('set output-meta on')\n readline.parse_and_bind('set convert-meta off')\n readline.parse_and_bind('set enable-meta-keybindings on')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = (\n f\"You are a coding agent at {os.getcwd()}. \"\n \"Use bash to inspect and change the workspace. Act first, then report clearly.\"\n)\n\nTOOLS = [{\n \"name\": \"bash\",\n \"description\": \"Run a shell command in the current workspace.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n}]\n\n\n@dataclass\nclass LoopState:\n # The minimal loop state: history, loop count, and why we continue.\n messages: list\n turn_count: int = 1\n transition_reason: str | None = None\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=os.getcwd(),\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n except (FileNotFoundError, OSError) as e:\n return f\"Error: {e}\"\n\n output = (result.stdout + result.stderr).strip()\n return output[:50000] if output else \"(no output)\"\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef execute_tool_calls(response_content) -> list[dict]:\n results = []\n for block in response_content:\n if block.type != \"tool_use\":\n continue\n command = block.input[\"command\"]\n print(f\"\\033[33m$ {command}\\033[0m\")\n output = run_bash(command)\n print(output[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n return results\n\n\ndef run_one_turn(state: LoopState) -> bool:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=state.messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n state.messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n state.transition_reason = None\n return False\n\n results = execute_tool_calls(response.content)\n if not results:\n state.transition_reason = None\n return False\n\n state.messages.append({\"role\": \"user\", \"content\": results})\n state.turn_count += 1\n state.transition_reason = \"tool_result\"\n return True\n\n\ndef agent_loop(state: LoopState) -> None:\n while run_one_turn(state):\n pass\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms01 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n state = LoopState(messages=history)\n agent_loop(state)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: the loop -- keep feeding real tool results back into the model.\n\"\"\"\ns01_agent_loop.py - The Agent Loop\n\nThis file teaches the smallest useful coding-agent pattern:\n\n user message\n -> model reply\n -> if tool_use: execute tools\n -> write tool_result back to messages\n -> continue\n\nIt intentionally keeps the loop small, but still makes the loop state explicit\nso later chapters can grow from the same structure.\n\"\"\"\n\nimport os\nimport subprocess\nfrom dataclasses import dataclass\n\ntry:\n import readline\n # #143 UTF-8 backspace fix for macOS libedit\n readline.parse_and_bind('set bind-tty-special-chars off')\n readline.parse_and_bind('set input-meta on')\n readline.parse_and_bind('set output-meta on')\n readline.parse_and_bind('set convert-meta off')\n readline.parse_and_bind('set enable-meta-keybindings on')\nexcept ImportError:\n pass\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = (\n f\"You are a coding agent at {os.getcwd()}. \"\n \"Use bash to inspect and change the workspace. Act first, then report clearly.\"\n)\n\nTOOLS = [{\n \"name\": \"bash\",\n \"description\": \"Run a shell command in the current workspace.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n}]\n\n\n@dataclass\nclass LoopState:\n # The minimal loop state: history, loop count, and why we continue.\n messages: list\n turn_count: int = 1\n transition_reason: str | None = None\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=os.getcwd(),\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n except (FileNotFoundError, OSError) as e:\n return f\"Error: {e}\"\n\n output = (result.stdout + result.stderr).strip()\n return output[:50000] if output else \"(no output)\"\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef execute_tool_calls(response_content) -> list[dict]:\n results = []\n for block in response_content:\n if block.type != \"tool_use\":\n continue\n command = block.input[\"command\"]\n print(f\"\\033[33m$ {command}\\033[0m\")\n output = run_bash(command)\n print(output[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n return results\n\n\ndef run_one_turn(state: LoopState) -> bool:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=state.messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n state.messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n state.transition_reason = None\n return False\n\n results = execute_tool_calls(response.content)\n if not results:\n state.transition_reason = None\n return False\n\n state.messages.append({\"role\": \"user\", \"content\": results})\n state.turn_count += 1\n state.transition_reason = \"tool_result\"\n return True\n\n\ndef agent_loop(state: LoopState) -> None:\n while run_one_turn(state):\n pass\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms01 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n state = LoopState(messages=history)\n agent_loop(state)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n"
},
{
"id": "s02",
"filename": "s02_tool_use.py",
- "title": "Tools",
- "subtitle": "One Handler Per Tool",
- "loc": 120,
+ "title": "Tool Use",
+ "subtitle": "Route Intent into Action",
+ "loc": 169,
"tools": [
"bash",
"read_file",
@@ -47,50 +73,60 @@
"write_file",
"edit_file"
],
- "coreAddition": "Tool dispatch map",
- "keyInsight": "The loop stays the same; new tools register into the dispatch map",
+ "coreAddition": "Tool specs + dispatch map",
+ "keyInsight": "Adding a tool means adding one handler. The loop never changes.",
"classes": [],
"functions": [
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
- "startLine": 40
+ "startLine": 32
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
- "startLine": 47
+ "startLine": 39
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
- "startLine": 60
+ "startLine": 52
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
- "startLine": 71
+ "startLine": 63
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
- "startLine": 81
+ "startLine": 73
+ },
+ {
+ "name": "normalize_messages",
+ "signature": "def normalize_messages(messages: list)",
+ "startLine": 110
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 113
+ "startLine": 172
}
],
- "layer": "tools",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns02_tool_use.py - Tools\n\nThe agent loop from s01 didn't change. We just added tools to the array\nand a dispatch map to route calls.\n\n +----------+ +-------+ +------------------+\n | User | ---> | LLM | ---> | Tool Dispatch |\n | prompt | | | | { |\n +----------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +----------+ edit: run_edit |\n tool_result| } |\n +------------------+\n\nKey insight: \"The loop didn't change at all. I just added tools.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\n\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- The dispatch map: {tool_name: handler} --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\"> {block.name}: {output[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms02 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ "layer": "core",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: tool dispatch -- expanding what the model can reach.\n\"\"\"\ns02_tool_use.py - Tool dispatch + message normalization\n\nThe agent loop from s01 didn't change. We added tools to the dispatch map,\nand a normalize_messages() function that cleans up the message list before\neach API call.\n\nKey insight: \"The loop didn't change at all. I just added tools.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\n\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Concurrency safety classification --\n# Read-only tools can safely run in parallel; mutating tools must be serialized.\nCONCURRENCY_SAFE = {\"read_file\"}\nCONCURRENCY_UNSAFE = {\"write_file\", \"edit_file\"}\n\n# -- The dispatch map: {tool_name: handler} --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\ndef normalize_messages(messages: list) -> list:\n \"\"\"Clean up messages before sending to the API.\n\n Three jobs:\n 1. Strip internal metadata fields the API doesn't understand\n 2. Ensure every tool_use has a matching tool_result (insert placeholder if missing)\n 3. Merge consecutive same-role messages (API requires strict alternation)\n \"\"\"\n cleaned = []\n for msg in messages:\n clean = {\"role\": msg[\"role\"]}\n if isinstance(msg.get(\"content\"), str):\n clean[\"content\"] = msg[\"content\"]\n elif isinstance(msg.get(\"content\"), list):\n clean[\"content\"] = [\n {k: v for k, v in block.items()\n if not k.startswith(\"_\")}\n for block in msg[\"content\"]\n if isinstance(block, dict)\n ]\n else:\n clean[\"content\"] = msg.get(\"content\", \"\")\n cleaned.append(clean)\n\n # Collect existing tool_result IDs\n existing_results = set()\n for msg in cleaned:\n if isinstance(msg.get(\"content\"), list):\n for block in msg[\"content\"]:\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n existing_results.add(block.get(\"tool_use_id\"))\n\n # Find orphaned tool_use blocks and insert placeholder results\n for msg in cleaned:\n if msg[\"role\"] != \"assistant\" or not isinstance(msg.get(\"content\"), list):\n continue\n for block in msg[\"content\"]:\n if not isinstance(block, dict):\n continue\n if block.get(\"type\") == \"tool_use\" and block.get(\"id\") not in existing_results:\n cleaned.append({\"role\": \"user\", \"content\": [\n {\"type\": \"tool_result\", \"tool_use_id\": block[\"id\"],\n \"content\": \"(cancelled)\"}\n ]})\n\n # Merge consecutive same-role messages\n if not cleaned:\n return cleaned\n merged = [cleaned[0]]\n for msg in cleaned[1:]:\n if msg[\"role\"] == merged[-1][\"role\"]:\n prev = merged[-1]\n prev_c = prev[\"content\"] if isinstance(prev[\"content\"], list) \\\n else [{\"type\": \"text\", \"text\": str(prev[\"content\"])}]\n curr_c = msg[\"content\"] if isinstance(msg[\"content\"], list) \\\n else [{\"type\": \"text\", \"text\": str(msg[\"content\"])}]\n prev[\"content\"] = prev_c + curr_c\n else:\n merged.append(msg)\n return merged\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM,\n messages=normalize_messages(messages),\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\"> {block.name}:\")\n print(output[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms02 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): tool dispatch(工具分发)——扩展模型可达能力边界。\n\"\"\"\ns02_tool_use.py - 工具分发 + 消息规范化(message normalization)\n\ns01 的 agent loop(智能体循环)没有改。新增内容只有两类:\n1. 把工具接入 dispatch map(分发表);\n2. 在每次 API 调用前,用 normalize_messages() 清理消息列表。\n\n关键洞察:\n\"循环本身完全没变,我只是加了工具。\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = (\n f\"你是位于 {WORKDIR} 的 coding agent(编码智能体)。\"\n \"优先使用工具解决任务。先执行,再解释。\"\n)\n\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- 并发安全分类(concurrency safety) --\n# 只读工具可并行;有副作用的写入工具必须串行。\nCONCURRENCY_SAFE = {\"read_file\"}\nCONCURRENCY_UNSAFE = {\"write_file\", \"edit_file\"}\n\n# -- 分发表(dispatch map): {tool_name: handler} --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换一次精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\ndef normalize_messages(messages: list) -> list:\n \"\"\"发送 API 前清理消息列表。\n\n 主要做三件事:\n 1. 移除 API 不识别的内部元数据字段;\n 2. 确保每个 tool_use(工具调用)都有对应 tool_result(工具结果);\n 若缺失则插入占位结果;\n 3. 合并连续同角色消息(API 侧通常要求严格交替)。\n \"\"\"\n cleaned = []\n for msg in messages:\n clean = {\"role\": msg[\"role\"]}\n if isinstance(msg.get(\"content\"), str):\n clean[\"content\"] = msg[\"content\"]\n elif isinstance(msg.get(\"content\"), list):\n clean[\"content\"] = [\n {k: v for k, v in block.items()\n if not k.startswith(\"_\")}\n for block in msg[\"content\"]\n if isinstance(block, dict)\n ]\n else:\n clean[\"content\"] = msg.get(\"content\", \"\")\n cleaned.append(clean)\n\n # 收集已有 tool_result 的 ID\n existing_results = set()\n for msg in cleaned:\n if isinstance(msg.get(\"content\"), list):\n for block in msg[\"content\"]:\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n existing_results.add(block.get(\"tool_use_id\"))\n\n # 查找孤立 tool_use 并补充占位 tool_result\n for msg in cleaned:\n if msg[\"role\"] != \"assistant\" or not isinstance(msg.get(\"content\"), list):\n continue\n for block in msg[\"content\"]:\n if not isinstance(block, dict):\n continue\n if block.get(\"type\") == \"tool_use\" and block.get(\"id\") not in existing_results:\n cleaned.append({\"role\": \"user\", \"content\": [\n {\"type\": \"tool_result\", \"tool_use_id\": block[\"id\"],\n \"content\": \"(cancelled)\"}\n ]})\n\n # 合并连续同角色消息\n if not cleaned:\n return cleaned\n merged = [cleaned[0]]\n for msg in cleaned[1:]:\n if msg[\"role\"] == merged[-1][\"role\"]:\n prev = merged[-1]\n prev_c = prev[\"content\"] if isinstance(prev[\"content\"], list) \\\n else [{\"type\": \"text\", \"text\": str(prev[\"content\"])}]\n curr_c = msg[\"content\"] if isinstance(msg[\"content\"], list) \\\n else [{\"type\": \"text\", \"text\": str(msg[\"content\"])}]\n prev[\"content\"] = prev_c + curr_c\n else:\n merged.append(msg)\n return merged\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM,\n messages=normalize_messages(messages),\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\"> {block.name}:\")\n print(output[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms02 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: tool dispatch -- expanding what the model can reach.\n\"\"\"\ns02_tool_use.py - Tool dispatch + message normalization\n\nThe agent loop from s01 didn't change. We added tools to the dispatch map,\nand a normalize_messages() function that cleans up the message list before\neach API call.\n\nKey insight: \"The loop didn't change at all. I just added tools.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\n\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Concurrency safety classification --\n# Read-only tools can safely run in parallel; mutating tools must be serialized.\nCONCURRENCY_SAFE = {\"read_file\"}\nCONCURRENCY_UNSAFE = {\"write_file\", \"edit_file\"}\n\n# -- The dispatch map: {tool_name: handler} --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\ndef normalize_messages(messages: list) -> list:\n \"\"\"Clean up messages before sending to the API.\n\n Three jobs:\n 1. Strip internal metadata fields the API doesn't understand\n 2. Ensure every tool_use has a matching tool_result (insert placeholder if missing)\n 3. Merge consecutive same-role messages (API requires strict alternation)\n \"\"\"\n cleaned = []\n for msg in messages:\n clean = {\"role\": msg[\"role\"]}\n if isinstance(msg.get(\"content\"), str):\n clean[\"content\"] = msg[\"content\"]\n elif isinstance(msg.get(\"content\"), list):\n clean[\"content\"] = [\n {k: v for k, v in block.items()\n if not k.startswith(\"_\")}\n for block in msg[\"content\"]\n if isinstance(block, dict)\n ]\n else:\n clean[\"content\"] = msg.get(\"content\", \"\")\n cleaned.append(clean)\n\n # Collect existing tool_result IDs\n existing_results = set()\n for msg in cleaned:\n if isinstance(msg.get(\"content\"), list):\n for block in msg[\"content\"]:\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n existing_results.add(block.get(\"tool_use_id\"))\n\n # Find orphaned tool_use blocks and insert placeholder results\n for msg in cleaned:\n if msg[\"role\"] != \"assistant\" or not isinstance(msg.get(\"content\"), list):\n continue\n for block in msg[\"content\"]:\n if not isinstance(block, dict):\n continue\n if block.get(\"type\") == \"tool_use\" and block.get(\"id\") not in existing_results:\n cleaned.append({\"role\": \"user\", \"content\": [\n {\"type\": \"tool_result\", \"tool_use_id\": block[\"id\"],\n \"content\": \"(cancelled)\"}\n ]})\n\n # Merge consecutive same-role messages\n if not cleaned:\n return cleaned\n merged = [cleaned[0]]\n for msg in cleaned[1:]:\n if msg[\"role\"] == merged[-1][\"role\"]:\n prev = merged[-1]\n prev_c = prev[\"content\"] if isinstance(prev[\"content\"], list) \\\n else [{\"type\": \"text\", \"text\": str(prev[\"content\"])}]\n curr_c = msg[\"content\"] if isinstance(msg[\"content\"], list) \\\n else [{\"type\": \"text\", \"text\": str(msg[\"content\"])}]\n prev[\"content\"] = prev_c + curr_c\n else:\n merged.append(msg)\n return merged\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM,\n messages=normalize_messages(messages),\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\"> {block.name}:\")\n print(output[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms02 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: tool dispatch -- expanding what the model can reach.\n\"\"\"\ns02_tool_use.py - Tool dispatch + message normalization\n\nThe agent loop from s01 didn't change. We added tools to the dispatch map,\nand a normalize_messages() function that cleans up the message list before\neach API call.\n\nKey insight: \"The loop didn't change at all. I just added tools.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\n\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Concurrency safety classification --\n# Read-only tools can safely run in parallel; mutating tools must be serialized.\nCONCURRENCY_SAFE = {\"read_file\"}\nCONCURRENCY_UNSAFE = {\"write_file\", \"edit_file\"}\n\n# -- The dispatch map: {tool_name: handler} --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\ndef normalize_messages(messages: list) -> list:\n \"\"\"Clean up messages before sending to the API.\n\n Three jobs:\n 1. Strip internal metadata fields the API doesn't understand\n 2. Ensure every tool_use has a matching tool_result (insert placeholder if missing)\n 3. Merge consecutive same-role messages (API requires strict alternation)\n \"\"\"\n cleaned = []\n for msg in messages:\n clean = {\"role\": msg[\"role\"]}\n if isinstance(msg.get(\"content\"), str):\n clean[\"content\"] = msg[\"content\"]\n elif isinstance(msg.get(\"content\"), list):\n clean[\"content\"] = [\n {k: v for k, v in block.items()\n if not k.startswith(\"_\")}\n for block in msg[\"content\"]\n if isinstance(block, dict)\n ]\n else:\n clean[\"content\"] = msg.get(\"content\", \"\")\n cleaned.append(clean)\n\n # Collect existing tool_result IDs\n existing_results = set()\n for msg in cleaned:\n if isinstance(msg.get(\"content\"), list):\n for block in msg[\"content\"]:\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n existing_results.add(block.get(\"tool_use_id\"))\n\n # Find orphaned tool_use blocks and insert placeholder results\n for msg in cleaned:\n if msg[\"role\"] != \"assistant\" or not isinstance(msg.get(\"content\"), list):\n continue\n for block in msg[\"content\"]:\n if not isinstance(block, dict):\n continue\n if block.get(\"type\") == \"tool_use\" and block.get(\"id\") not in existing_results:\n cleaned.append({\"role\": \"user\", \"content\": [\n {\"type\": \"tool_result\", \"tool_use_id\": block[\"id\"],\n \"content\": \"(cancelled)\"}\n ]})\n\n # Merge consecutive same-role messages\n if not cleaned:\n return cleaned\n merged = [cleaned[0]]\n for msg in cleaned[1:]:\n if msg[\"role\"] == merged[-1][\"role\"]:\n prev = merged[-1]\n prev_c = prev[\"content\"] if isinstance(prev[\"content\"], list) \\\n else [{\"type\": \"text\", \"text\": str(prev[\"content\"])}]\n curr_c = msg[\"content\"] if isinstance(msg[\"content\"], list) \\\n else [{\"type\": \"text\", \"text\": str(msg[\"content\"])}]\n prev[\"content\"] = prev_c + curr_c\n else:\n merged.append(msg)\n return merged\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM,\n messages=normalize_messages(messages),\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\"> {block.name}:\")\n print(output[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms02 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
},
{
"id": "s03",
"filename": "s03_todo_write.py",
"title": "TodoWrite",
- "subtitle": "Plan Before You Act",
- "loc": 176,
+ "subtitle": "Session Planning",
+ "loc": 279,
"tools": [
"bash",
"read_file",
@@ -101,56 +137,76 @@
"newTools": [
"todo"
],
- "coreAddition": "TodoManager + nag reminder",
- "keyInsight": "An agent without a plan drifts; list the steps first, then execute",
+ "coreAddition": "PlanningState + reminder loop",
+ "keyInsight": "A visible plan keeps the agent on track when tasks get complex.",
"classes": [
+ {
+ "name": "PlanItem",
+ "startLine": 36,
+ "endLine": 42
+ },
+ {
+ "name": "PlanningState",
+ "startLine": 43,
+ "endLine": 47
+ },
{
"name": "TodoManager",
- "startLine": 51,
- "endLine": 87
+ "startLine": 48,
+ "endLine": 113
}
],
"functions": [
{
"name": "safe_path",
- "signature": "def safe_path(p: str)",
- "startLine": 92
+ "signature": "def safe_path(path_str: str)",
+ "startLine": 117
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
- "startLine": 98
+ "startLine": 124
},
{
"name": "run_read",
- "signature": "def run_read(path: str, limit: int = None)",
- "startLine": 110
+ "signature": "def run_read(path: str, limit: int | None = None)",
+ "startLine": 144
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
- "startLine": 119
+ "startLine": 154
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
- "startLine": 128
+ "startLine": 164
+ },
+ {
+ "name": "extract_text",
+ "signature": "def extract_text(content)",
+ "startLine": 262
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 163
+ "startLine": 273
}
],
- "layer": "planning",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns03_todo_write.py - TodoWrite\n\nThe model tracks its own progress via a TodoManager. A nag reminder\nforces it to keep updating when it forgets.\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tools |\n | prompt | | | | + todo |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject \n\nKey insight: \"The agent can track its own progress -- and I can see it.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool to plan multi-step tasks. Mark in_progress before starting, completed when done.\nPrefer tools over prose.\"\"\"\n\n\n# -- TodoManager: structured state the LLM writes to --\nclass TodoManager:\n def __init__(self):\n self.items = []\n\n def update(self, items: list) -> str:\n if len(items) > 20:\n raise ValueError(\"Max 20 todos allowed\")\n validated = []\n in_progress_count = 0\n for i, item in enumerate(items):\n text = str(item.get(\"text\", \"\")).strip()\n status = str(item.get(\"status\", \"pending\")).lower()\n item_id = str(item.get(\"id\", str(i + 1)))\n if not text:\n raise ValueError(f\"Item {item_id}: text required\")\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Item {item_id}: invalid status '{status}'\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item_id, \"text\": text, \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress at a time\")\n self.items = validated\n return self.render()\n\n def render(self) -> str:\n if not self.items:\n return \"No todos.\"\n lines = []\n for item in self.items:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}[item[\"status\"]]\n lines.append(f\"{marker} #{item['id']}: {item['text']}\")\n done = sum(1 for t in self.items if t[\"status\"] == \"completed\")\n lines.append(f\"\\n({done}/{len(self.items)} completed)\")\n return \"\\n\".join(lines)\n\n\nTODO = TodoManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"todo\", \"description\": \"Update task list. Track progress on multi-step tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"items\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"id\": {\"type\": \"string\"}, \"text\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"id\", \"text\", \"status\"]}}}, \"required\": [\"items\"]}},\n]\n\n\n# -- Agent loop with nag reminder injection --\ndef agent_loop(messages: list):\n rounds_since_todo = 0\n while True:\n # Nag reminder is injected below, alongside tool results\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n used_todo = False\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n if block.name == \"todo\":\n used_todo = True\n rounds_since_todo = 0 if used_todo else rounds_since_todo + 1\n if rounds_since_todo >= 3:\n results.insert(0, {\"type\": \"text\", \"text\": \"Update your todos. \"})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms03 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ "layer": "core",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: planning -- keep the current session plan outside the model's head.\n\"\"\"\ns03_todo_write.py - Session Planning with TodoWrite\n\nThis chapter is about a lightweight session plan, not a durable task graph.\nThe model can rewrite its current plan, keep one active step in focus, and get\nnudged if it stops refreshing the plan for too many rounds.\n\"\"\"\n\nimport os\nimport subprocess\nfrom dataclasses import dataclass, field\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nPLAN_REMINDER_INTERVAL = 3\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool for multi-step work.\nKeep exactly one step in_progress when a task has multiple steps.\nRefresh the plan as work advances. Prefer tools over prose.\"\"\"\n\n\n@dataclass\nclass PlanItem:\n content: str\n status: str = \"pending\"\n active_form: str = \"\"\n\n\n@dataclass\nclass PlanningState:\n items: list[PlanItem] = field(default_factory=list)\n rounds_since_update: int = 0\n\n\nclass TodoManager:\n def __init__(self):\n self.state = PlanningState()\n\n def update(self, items: list) -> str:\n if len(items) > 12:\n raise ValueError(\"Keep the session plan short (max 12 items)\")\n\n normalized = []\n in_progress_count = 0\n for index, raw_item in enumerate(items):\n content = str(raw_item.get(\"content\", \"\")).strip()\n status = str(raw_item.get(\"status\", \"pending\")).lower()\n active_form = str(raw_item.get(\"activeForm\", \"\")).strip()\n\n if not content:\n raise ValueError(f\"Item {index}: content required\")\n if status not in {\"pending\", \"in_progress\", \"completed\"}:\n raise ValueError(f\"Item {index}: invalid status '{status}'\")\n if status == \"in_progress\":\n in_progress_count += 1\n\n normalized.append(PlanItem(\n content=content,\n status=status,\n active_form=active_form,\n ))\n\n if in_progress_count > 1:\n raise ValueError(\"Only one plan item can be in_progress\")\n\n self.state.items = normalized\n self.state.rounds_since_update = 0\n return self.render()\n\n def note_round_without_update(self) -> None:\n self.state.rounds_since_update += 1\n\n def reminder(self) -> str | None:\n if not self.state.items:\n return None\n if self.state.rounds_since_update < PLAN_REMINDER_INTERVAL:\n return None\n return \"Refresh your current plan before continuing. \"\n\n def render(self) -> str:\n if not self.state.items:\n return \"No session plan yet.\"\n\n lines = []\n for item in self.state.items:\n marker = {\n \"pending\": \"[ ]\",\n \"in_progress\": \"[>]\",\n \"completed\": \"[x]\",\n }[item.status]\n line = f\"{marker} {item.content}\"\n if item.status == \"in_progress\" and item.active_form:\n line += f\" ({item.active_form})\"\n lines.append(line)\n\n completed = sum(1 for item in self.state.items if item.status == \"completed\")\n lines.append(f\"\\n({completed}/{len(self.state.items)} completed)\")\n return \"\\n\".join(lines)\n\n\nTODO = TodoManager()\n\n\ndef safe_path(path_str: str) -> Path:\n path = (WORKDIR / path_str).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {path_str}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n output = (result.stdout + result.stderr).strip()\n return output[:50000] if output else \"(no output)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n content = file_path.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n file_path.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to a file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"todo\",\n \"description\": \"Rewrite the current session plan for multi-step work.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"items\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\n \"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"],\n },\n \"activeForm\": {\n \"type\": \"string\",\n \"description\": \"Optional present-continuous label.\",\n },\n },\n \"required\": [\"content\", \"status\"],\n },\n },\n },\n \"required\": [\"items\"],\n },\n },\n]\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef agent_loop(messages: list) -> None:\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n used_todo = False\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as exc:\n output = f\"Error: {exc}\"\n\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"todo\":\n used_todo = True\n\n if used_todo:\n TODO.state.rounds_since_update = 0\n else:\n TODO.note_round_without_update()\n reminder = TODO.reminder()\n if reminder:\n results.insert(0, {\"type\": \"text\", \"text\": reminder})\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms03 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): planning(规划)——把当前会话计划从模型“脑内”外置出来。\n\"\"\"\ns03_todo_write.py - 使用 TodoWrite 的会话规划\n\n本章聚焦轻量会话计划(session plan),而非持久任务图(durable task graph)。\n模型可以重写当前计划,在多步任务中始终保持一个激活步骤,\n并在长期不更新计划时收到提醒。\n\"\"\"\n\nimport os\nimport subprocess\nfrom dataclasses import dataclass, field\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\nPLAN_REMINDER_INTERVAL = 3\n\nSYSTEM = f\"\"\"你是位于 {WORKDIR} 的 coding agent(编码智能体)。\n多步骤任务请使用 todo 工具。\n当任务包含多步时,只允许一个步骤处于 in_progress 状态。\n随着工作推进持续刷新计划。优先使用工具,而不是长篇文字。\"\"\"\n\n\n@dataclass\nclass PlanItem:\n content: str\n status: str = \"pending\"\n active_form: str = \"\"\n\n\n@dataclass\nclass PlanningState:\n items: list[PlanItem] = field(default_factory=list)\n rounds_since_update: int = 0\n\n\nclass TodoManager:\n def __init__(self):\n self.state = PlanningState()\n\n def update(self, items: list) -> str:\n if len(items) > 12:\n raise ValueError(\"会话计划请保持简洁(最多 12 项)\")\n\n normalized = []\n in_progress_count = 0\n for index, raw_item in enumerate(items):\n content = str(raw_item.get(\"content\", \"\")).strip()\n status = str(raw_item.get(\"status\", \"pending\")).lower()\n active_form = str(raw_item.get(\"activeForm\", \"\")).strip()\n\n if not content:\n raise ValueError(f\"条目 {index}: content(内容)不能为空\")\n if status not in {\"pending\", \"in_progress\", \"completed\"}:\n raise ValueError(f\"条目 {index}: status(状态)非法:'{status}'\")\n if status == \"in_progress\":\n in_progress_count += 1\n\n normalized.append(PlanItem(\n content=content,\n status=status,\n active_form=active_form,\n ))\n\n if in_progress_count > 1:\n raise ValueError(\"计划中最多只能有一个条目处于 in_progress\")\n\n self.state.items = normalized\n self.state.rounds_since_update = 0\n return self.render()\n\n def note_round_without_update(self) -> None:\n self.state.rounds_since_update += 1\n\n def reminder(self) -> str | None:\n if not self.state.items:\n return None\n if self.state.rounds_since_update < PLAN_REMINDER_INTERVAL:\n return None\n return \"继续前请刷新当前计划。 \"\n\n def render(self) -> str:\n if not self.state.items:\n return \"当前尚无会话计划。\"\n\n lines = []\n for item in self.state.items:\n marker = {\n \"pending\": \"[ ]\",\n \"in_progress\": \"[>]\",\n \"completed\": \"[x]\",\n }[item.status]\n line = f\"{marker} {item.content}\"\n if item.status == \"in_progress\" and item.active_form:\n line += f\" ({item.active_form})\"\n lines.append(line)\n\n completed = sum(1 for item in self.state.items if item.status == \"completed\")\n lines.append(f\"\\n(已完成 {completed}/{len(self.state.items)})\")\n return \"\\n\".join(lines)\n\n\nTODO = TodoManager()\n\n\ndef safe_path(path_str: str) -> Path:\n path = (WORKDIR / path_str).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {path_str}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n output = (result.stdout + result.stderr).strip()\n return output[:50000] if output else \"(no output)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n content = file_path.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n file_path.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"读取文件内容。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"向文件写入内容。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"在文件中执行一次精确文本替换。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"todo\",\n \"description\": \"为多步骤任务重写当前会话计划。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"items\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\n \"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"],\n },\n \"activeForm\": {\n \"type\": \"string\",\n \"description\": \"可选:当前进行态标签。\",\n },\n },\n \"required\": [\"content\", \"status\"],\n },\n },\n },\n \"required\": [\"items\"],\n },\n },\n]\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef agent_loop(messages: list) -> None:\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n used_todo = False\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as exc:\n output = f\"Error: {exc}\"\n\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"todo\":\n used_todo = True\n\n if used_todo:\n TODO.state.rounds_since_update = 0\n else:\n TODO.note_round_without_update()\n reminder = TODO.reminder()\n if reminder:\n results.insert(0, {\"type\": \"text\", \"text\": reminder})\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms03 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: planning -- keep the current session plan outside the model's head.\n\"\"\"\ns03_todo_write.py - Session Planning with TodoWrite\n\nThis chapter is about a lightweight session plan, not a durable task graph.\nThe model can rewrite its current plan, keep one active step in focus, and get\nnudged if it stops refreshing the plan for too many rounds.\n\"\"\"\n\nimport os\nimport subprocess\nfrom dataclasses import dataclass, field\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nPLAN_REMINDER_INTERVAL = 3\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool for multi-step work.\nKeep exactly one step in_progress when a task has multiple steps.\nRefresh the plan as work advances. Prefer tools over prose.\"\"\"\n\n\n@dataclass\nclass PlanItem:\n content: str\n status: str = \"pending\"\n active_form: str = \"\"\n\n\n@dataclass\nclass PlanningState:\n items: list[PlanItem] = field(default_factory=list)\n rounds_since_update: int = 0\n\n\nclass TodoManager:\n def __init__(self):\n self.state = PlanningState()\n\n def update(self, items: list) -> str:\n if len(items) > 12:\n raise ValueError(\"Keep the session plan short (max 12 items)\")\n\n normalized = []\n in_progress_count = 0\n for index, raw_item in enumerate(items):\n content = str(raw_item.get(\"content\", \"\")).strip()\n status = str(raw_item.get(\"status\", \"pending\")).lower()\n active_form = str(raw_item.get(\"activeForm\", \"\")).strip()\n\n if not content:\n raise ValueError(f\"Item {index}: content required\")\n if status not in {\"pending\", \"in_progress\", \"completed\"}:\n raise ValueError(f\"Item {index}: invalid status '{status}'\")\n if status == \"in_progress\":\n in_progress_count += 1\n\n normalized.append(PlanItem(\n content=content,\n status=status,\n active_form=active_form,\n ))\n\n if in_progress_count > 1:\n raise ValueError(\"Only one plan item can be in_progress\")\n\n self.state.items = normalized\n self.state.rounds_since_update = 0\n return self.render()\n\n def note_round_without_update(self) -> None:\n self.state.rounds_since_update += 1\n\n def reminder(self) -> str | None:\n if not self.state.items:\n return None\n if self.state.rounds_since_update < PLAN_REMINDER_INTERVAL:\n return None\n return \"Refresh your current plan before continuing. \"\n\n def render(self) -> str:\n if not self.state.items:\n return \"No session plan yet.\"\n\n lines = []\n for item in self.state.items:\n marker = {\n \"pending\": \"[ ]\",\n \"in_progress\": \"[>]\",\n \"completed\": \"[x]\",\n }[item.status]\n line = f\"{marker} {item.content}\"\n if item.status == \"in_progress\" and item.active_form:\n line += f\" ({item.active_form})\"\n lines.append(line)\n\n completed = sum(1 for item in self.state.items if item.status == \"completed\")\n lines.append(f\"\\n({completed}/{len(self.state.items)} completed)\")\n return \"\\n\".join(lines)\n\n\nTODO = TodoManager()\n\n\ndef safe_path(path_str: str) -> Path:\n path = (WORKDIR / path_str).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {path_str}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n output = (result.stdout + result.stderr).strip()\n return output[:50000] if output else \"(no output)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n content = file_path.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n file_path.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to a file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"todo\",\n \"description\": \"Rewrite the current session plan for multi-step work.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"items\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\n \"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"],\n },\n \"activeForm\": {\n \"type\": \"string\",\n \"description\": \"Optional present-continuous label.\",\n },\n },\n \"required\": [\"content\", \"status\"],\n },\n },\n },\n \"required\": [\"items\"],\n },\n },\n]\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef agent_loop(messages: list) -> None:\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n used_todo = False\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as exc:\n output = f\"Error: {exc}\"\n\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"todo\":\n used_todo = True\n\n if used_todo:\n TODO.state.rounds_since_update = 0\n else:\n TODO.note_round_without_update()\n reminder = TODO.reminder()\n if reminder:\n results.insert(0, {\"type\": \"text\", \"text\": reminder})\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms03 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: planning -- keep the current session plan outside the model's head.\n\"\"\"\ns03_todo_write.py - Session Planning with TodoWrite\n\nThis chapter is about a lightweight session plan, not a durable task graph.\nThe model can rewrite its current plan, keep one active step in focus, and get\nnudged if it stops refreshing the plan for too many rounds.\n\"\"\"\n\nimport os\nimport subprocess\nfrom dataclasses import dataclass, field\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nPLAN_REMINDER_INTERVAL = 3\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool for multi-step work.\nKeep exactly one step in_progress when a task has multiple steps.\nRefresh the plan as work advances. Prefer tools over prose.\"\"\"\n\n\n@dataclass\nclass PlanItem:\n content: str\n status: str = \"pending\"\n active_form: str = \"\"\n\n\n@dataclass\nclass PlanningState:\n items: list[PlanItem] = field(default_factory=list)\n rounds_since_update: int = 0\n\n\nclass TodoManager:\n def __init__(self):\n self.state = PlanningState()\n\n def update(self, items: list) -> str:\n if len(items) > 12:\n raise ValueError(\"Keep the session plan short (max 12 items)\")\n\n normalized = []\n in_progress_count = 0\n for index, raw_item in enumerate(items):\n content = str(raw_item.get(\"content\", \"\")).strip()\n status = str(raw_item.get(\"status\", \"pending\")).lower()\n active_form = str(raw_item.get(\"activeForm\", \"\")).strip()\n\n if not content:\n raise ValueError(f\"Item {index}: content required\")\n if status not in {\"pending\", \"in_progress\", \"completed\"}:\n raise ValueError(f\"Item {index}: invalid status '{status}'\")\n if status == \"in_progress\":\n in_progress_count += 1\n\n normalized.append(PlanItem(\n content=content,\n status=status,\n active_form=active_form,\n ))\n\n if in_progress_count > 1:\n raise ValueError(\"Only one plan item can be in_progress\")\n\n self.state.items = normalized\n self.state.rounds_since_update = 0\n return self.render()\n\n def note_round_without_update(self) -> None:\n self.state.rounds_since_update += 1\n\n def reminder(self) -> str | None:\n if not self.state.items:\n return None\n if self.state.rounds_since_update < PLAN_REMINDER_INTERVAL:\n return None\n return \"Refresh your current plan before continuing. \"\n\n def render(self) -> str:\n if not self.state.items:\n return \"No session plan yet.\"\n\n lines = []\n for item in self.state.items:\n marker = {\n \"pending\": \"[ ]\",\n \"in_progress\": \"[>]\",\n \"completed\": \"[x]\",\n }[item.status]\n line = f\"{marker} {item.content}\"\n if item.status == \"in_progress\" and item.active_form:\n line += f\" ({item.active_form})\"\n lines.append(line)\n\n completed = sum(1 for item in self.state.items if item.status == \"completed\")\n lines.append(f\"\\n({completed}/{len(self.state.items)} completed)\")\n return \"\\n\".join(lines)\n\n\nTODO = TodoManager()\n\n\ndef safe_path(path_str: str) -> Path:\n path = (WORKDIR / path_str).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {path_str}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n output = (result.stdout + result.stderr).strip()\n return output[:50000] if output else \"(no output)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n content = file_path.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n file_path.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to a file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"todo\",\n \"description\": \"Rewrite the current session plan for multi-step work.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"items\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"content\": {\"type\": \"string\"},\n \"status\": {\n \"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"],\n },\n \"activeForm\": {\n \"type\": \"string\",\n \"description\": \"Optional present-continuous label.\",\n },\n },\n \"required\": [\"content\", \"status\"],\n },\n },\n },\n \"required\": [\"items\"],\n },\n },\n]\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef agent_loop(messages: list) -> None:\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n used_todo = False\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as exc:\n output = f\"Error: {exc}\"\n\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"todo\":\n used_todo = True\n\n if used_todo:\n TODO.state.rounds_since_update = 0\n else:\n TODO.note_round_without_update()\n reminder = TODO.reminder()\n if reminder:\n results.insert(0, {\"type\": \"text\", \"text\": reminder})\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms03 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n"
},
{
"id": "s04",
"filename": "s04_subagent.py",
- "title": "Subagents",
- "subtitle": "Clean Context Per Subtask",
- "loc": 151,
+ "title": "Subagent",
+ "subtitle": "Fresh Context per Subtask",
+ "loc": 200,
"tools": [
"bash",
"read_file",
@@ -161,184 +217,613 @@
"newTools": [
"task"
],
- "coreAddition": "Subagent spawn with isolated messages[]",
- "keyInsight": "Subagents use independent messages[], keeping the main conversation clean",
- "classes": [],
+ "coreAddition": "Delegation with isolated message history",
+ "keyInsight": "A subagent is mainly a context boundary, not a process trick.",
+ "classes": [
+ {
+ "name": "AgentTemplate",
+ "startLine": 67,
+ "endLine": 98
+ }
+ ],
"functions": [
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
- "startLine": 46
+ "startLine": 99
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
- "startLine": 52
+ "startLine": 105
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
- "startLine": 64
+ "startLine": 119
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
- "startLine": 73
+ "startLine": 128
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
- "startLine": 82
+ "startLine": 137
},
{
"name": "run_subagent",
"signature": "def run_subagent(prompt: str)",
- "startLine": 115
+ "startLine": 170
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 143
+ "startLine": 198
+ }
+ ],
+ "layer": "core",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: context isolation -- protecting the model's clarity of thought.\n\"\"\"\ns04_subagent.py - Subagents\n\nSpawn a child agent with fresh messages=[]. The child works in its own\ncontext, sharing the filesystem, then returns only a summary to the parent.\n\n Parent agent Subagent\n +------------------+ +------------------+\n | messages=[...] | | messages=[] | <-- fresh\n | | dispatch | |\n | tool: task | ---------->| while tool_use: |\n | prompt=\"...\" | | call tools |\n | description=\"\" | | append results |\n | | summary | |\n | result = \"...\" | <--------- | return last text |\n +------------------+ +------------------+\n |\n Parent context stays clean.\n Subagent context is discarded.\n\nKey insight: \"Fresh messages=[] gives context isolation. The parent stays clean.\"\n\nNote: Real Claude Code also uses in-process isolation (not OS-level process\nforking). The child runs in the same process with a fresh message array and\nisolated tool context -- same pattern as this teaching implementation.\n\n Comparison with real Claude Code:\n +-------------------+------------------+----------------------------------+\n | Aspect | This demo | Real Claude Code |\n +-------------------+------------------+----------------------------------+\n | Backend | in-process only | 5 backends: in-process, tmux, |\n | | | iTerm2, fork, remote |\n | Context isolation | fresh messages=[]| createSubagentContext() isolates |\n | | | ~20 fields (tools, permissions, |\n | | | cwd, env, hooks, etc.) |\n | Tool filtering | manually curated | resolveAgentTools() filters from |\n | | | parent pool; allowedTools |\n | | | replaces all allow rules |\n | Agent definition | hardcoded system | .claude/agents/*.md with YAML |\n | | prompt | frontmatter (AgentTemplate) |\n +-------------------+------------------+----------------------------------+\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use the task tool to delegate exploration or subtasks.\"\nSUBAGENT_SYSTEM = f\"You are a coding subagent at {WORKDIR}. Complete the given task, then summarize your findings.\"\n\n\nclass AgentTemplate:\n \"\"\"\n Parse agent definition from markdown frontmatter.\n\n Real Claude Code loads agent definitions from .claude/agents/*.md.\n Frontmatter fields: name, tools, disallowedTools, skills, hooks,\n model, effort, permissionMode, maxTurns, memory, isolation, color,\n background, initialPrompt, mcpServers.\n 3 sources: built-in, custom (.claude/agents/), plugin-provided.\n \"\"\"\n def __init__(self, path):\n self.path = Path(path)\n self.name = self.path.stem\n self.config = {}\n self.system_prompt = \"\"\n self._parse()\n\n def _parse(self):\n text = self.path.read_text()\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n(.*)\", text, re.DOTALL)\n if not match:\n self.system_prompt = text\n return\n for line in match.group(1).splitlines():\n if \":\" in line:\n k, _, v = line.partition(\":\")\n self.config[k.strip()] = v.strip()\n self.system_prompt = match.group(2).strip()\n self.name = self.config.get(\"name\", self.name)\n\n\n# -- Tool implementations shared by parent and child --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n except (FileNotFoundError, OSError) as e:\n return f\"Error: {e}\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\n# Child gets all base tools except task (no recursive spawning)\nCHILD_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- Subagent: fresh context, filtered tools, summary-only return --\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}] # fresh context\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n # Only the final text returns to the parent -- child context is discarded\n return \"\".join(b.text for b in response.content if hasattr(b, \"text\")) or \"(no summary)\"\n\n\n# -- Parent tools: base tools + task dispatcher --\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\", \"description\": \"Spawn a subagent with fresh context. It shares the filesystem but not conversation history.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"prompt\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\", \"description\": \"Short description of the task\"}}, \"required\": [\"prompt\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=PARENT_TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"task\":\n desc = block.input.get(\"description\", \"subtask\")\n prompt = block.input.get(\"prompt\", \"\")\n print(f\"> task ({desc}): {prompt[:80]}\")\n output = run_subagent(prompt)\n else:\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\" {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms04 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): context isolation(上下文隔离)——保护模型思路清晰度。\n\"\"\"\ns04_subagent.py - Subagent(子智能体)\n\n通过 fresh `messages=[]` 启动 child agent(子智能体)。\n子智能体在独立上下文中工作、共享同一文件系统,最后只把摘要返回给父智能体。\n\n 父智能体(Parent agent) 子智能体(Subagent)\n +------------------+ +------------------+\n | messages=[...] | | messages=[] | <-- fresh(新上下文)\n | | dispatch(派发)| |\n | 工具: task | ---------->| while tool_use(工具调用): |\n | prompt=\"...\" | | call tools(调用工具) |\n | description=\"\" | | append results(追加结果) |\n | | summary(汇总) | |\n | result = \"...\" | <--------- | return last text(返回摘要) |\n +------------------+ +------------------+\n |\n 父上下文保持干净,子上下文任务结束后丢弃。\n\n关键洞察:\n\"fresh messages=[] 就是上下文隔离,父上下文不会被污染。\"\n\n注意:真实 Claude Code 也使用 in-process isolation(进程内隔离),\n并非操作系统级 fork。子智能体与父智能体在同一进程中运行,\n但拥有新的消息数组和隔离的工具上下文,这与本教学实现一致。\n\n 与真实 Claude Code 的对比:\n +-------------------+----------------------+---------------------------------------------+\n | 维度(Aspect) | 教学实现(This demo) | 真实 Claude Code(Real Claude Code) |\n +-------------------+----------------------+---------------------------------------------+\n | 后端(Backend) | 仅 in-process | 5 种后端:in-process、tmux、iTerm2、fork、remote |\n | 上下文隔离 | fresh messages=[] | createSubagentContext() 隔离约 20 个字段(tools、 |\n | (Context isolation)| | permissions、cwd、env、hooks 等) |\n | 工具过滤 | 手工挑选 | resolveAgentTools() 从父工具池过滤;allowedTools |\n | (Tool filtering) | | 可替代所有 allow 规则 |\n | 智能体定义 | 代码内硬编码 prompt | `.claude/agents/*.md` + YAML frontmatter |\n | (Agent definition)| | (模板 AgentTemplate) |\n +-------------------+----------------------+---------------------------------------------+\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = (\n f\"你是位于 {WORKDIR} 的 coding agent(编码智能体)。\"\n \"使用 task 工具委派探索或子任务。\"\n)\nSUBAGENT_SYSTEM = (\n f\"你是位于 {WORKDIR} 的 coding subagent(子智能体)。\"\n \"完成给定任务后,返回清晰摘要。\"\n)\n\n\nclass AgentTemplate:\n \"\"\"\n 从 markdown frontmatter 解析 agent 定义。\n\n 真实 Claude Code 会从 `.claude/agents/*.md` 读取 agent 定义。\n frontmatter 字段包括:name、tools、disallowedTools、skills、hooks、\n model(模型)、effort(推理力度)、permissionMode(权限模式)、maxTurns(轮次上限)、\n memory(记忆)、isolation(隔离)、color(颜色)、background(后台)、\n initialPrompt(初始提示)、mcpServers(MCP 服务配置)。\n 来源通常有三类:built-in、custom(`.claude/agents/`)、plugin-provided。\n \"\"\"\n def __init__(self, path):\n self.path = Path(path)\n self.name = self.path.stem\n self.config = {}\n self.system_prompt = \"\"\n self._parse()\n\n def _parse(self):\n text = self.path.read_text()\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n(.*)\", text, re.DOTALL)\n if not match:\n self.system_prompt = text\n return\n for line in match.group(1).splitlines():\n if \":\" in line:\n k, _, v = line.partition(\":\")\n self.config[k.strip()] = v.strip()\n self.system_prompt = match.group(2).strip()\n self.name = self.config.get(\"name\", self.name)\n\n\n# -- 父子共用工具实现 --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n except (FileNotFoundError, OSError) as e:\n return f\"Error: {e}\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\n# 子智能体使用基础工具,但不含 task(禁止递归再派生)。\nCHILD_TOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- 子智能体:新上下文、过滤工具、仅返回摘要 --\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}] # fresh context(新上下文)\n for _ in range(30): # safety limit(安全轮次上限)\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n # 仅把最终文本回传父智能体,子上下文整体丢弃。\n return \"\".join(b.text for b in response.content if hasattr(b, \"text\")) or \"(no summary)\"\n\n\n# -- 父智能体工具:基础工具 + task 分发器 --\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\", \"description\": \"创建具有新上下文的子智能体。共享文件系统,但不共享会话历史。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"prompt\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\", \"description\": \"任务的简短说明\"}}, \"required\": [\"prompt\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=PARENT_TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"task\":\n desc = block.input.get(\"description\", \"subtask\")\n prompt = block.input.get(\"prompt\", \"\")\n print(f\"> task ({desc}): {prompt[:80]}\")\n output = run_subagent(prompt)\n else:\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\" {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms04 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: context isolation -- protecting the model's clarity of thought.\n\"\"\"\ns04_subagent.py - Subagents\n\nSpawn a child agent with fresh messages=[]. The child works in its own\ncontext, sharing the filesystem, then returns only a summary to the parent.\n\n Parent agent Subagent\n +------------------+ +------------------+\n | messages=[...] | | messages=[] | <-- fresh\n | | dispatch | |\n | tool: task | ---------->| while tool_use: |\n | prompt=\"...\" | | call tools |\n | description=\"\" | | append results |\n | | summary | |\n | result = \"...\" | <--------- | return last text |\n +------------------+ +------------------+\n |\n Parent context stays clean.\n Subagent context is discarded.\n\nKey insight: \"Fresh messages=[] gives context isolation. The parent stays clean.\"\n\nNote: Real Claude Code also uses in-process isolation (not OS-level process\nforking). The child runs in the same process with a fresh message array and\nisolated tool context -- same pattern as this teaching implementation.\n\n Comparison with real Claude Code:\n +-------------------+------------------+----------------------------------+\n | Aspect | This demo | Real Claude Code |\n +-------------------+------------------+----------------------------------+\n | Backend | in-process only | 5 backends: in-process, tmux, |\n | | | iTerm2, fork, remote |\n | Context isolation | fresh messages=[]| createSubagentContext() isolates |\n | | | ~20 fields (tools, permissions, |\n | | | cwd, env, hooks, etc.) |\n | Tool filtering | manually curated | resolveAgentTools() filters from |\n | | | parent pool; allowedTools |\n | | | replaces all allow rules |\n | Agent definition | hardcoded system | .claude/agents/*.md with YAML |\n | | prompt | frontmatter (AgentTemplate) |\n +-------------------+------------------+----------------------------------+\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use the task tool to delegate exploration or subtasks.\"\nSUBAGENT_SYSTEM = f\"You are a coding subagent at {WORKDIR}. Complete the given task, then summarize your findings.\"\n\n\nclass AgentTemplate:\n \"\"\"\n Parse agent definition from markdown frontmatter.\n\n Real Claude Code loads agent definitions from .claude/agents/*.md.\n Frontmatter fields: name, tools, disallowedTools, skills, hooks,\n model, effort, permissionMode, maxTurns, memory, isolation, color,\n background, initialPrompt, mcpServers.\n 3 sources: built-in, custom (.claude/agents/), plugin-provided.\n \"\"\"\n def __init__(self, path):\n self.path = Path(path)\n self.name = self.path.stem\n self.config = {}\n self.system_prompt = \"\"\n self._parse()\n\n def _parse(self):\n text = self.path.read_text()\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n(.*)\", text, re.DOTALL)\n if not match:\n self.system_prompt = text\n return\n for line in match.group(1).splitlines():\n if \":\" in line:\n k, _, v = line.partition(\":\")\n self.config[k.strip()] = v.strip()\n self.system_prompt = match.group(2).strip()\n self.name = self.config.get(\"name\", self.name)\n\n\n# -- Tool implementations shared by parent and child --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n except (FileNotFoundError, OSError) as e:\n return f\"Error: {e}\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\n# Child gets all base tools except task (no recursive spawning)\nCHILD_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- Subagent: fresh context, filtered tools, summary-only return --\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}] # fresh context\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n # Only the final text returns to the parent -- child context is discarded\n return \"\".join(b.text for b in response.content if hasattr(b, \"text\")) or \"(no summary)\"\n\n\n# -- Parent tools: base tools + task dispatcher --\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\", \"description\": \"Spawn a subagent with fresh context. It shares the filesystem but not conversation history.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"prompt\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\", \"description\": \"Short description of the task\"}}, \"required\": [\"prompt\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=PARENT_TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"task\":\n desc = block.input.get(\"description\", \"subtask\")\n prompt = block.input.get(\"prompt\", \"\")\n print(f\"> task ({desc}): {prompt[:80]}\")\n output = run_subagent(prompt)\n else:\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\" {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms04 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: context isolation -- protecting the model's clarity of thought.\n\"\"\"\ns04_subagent.py - Subagents\n\nSpawn a child agent with fresh messages=[]. The child works in its own\ncontext, sharing the filesystem, then returns only a summary to the parent.\n\n Parent agent Subagent\n +------------------+ +------------------+\n | messages=[...] | | messages=[] | <-- fresh\n | | dispatch | |\n | tool: task | ---------->| while tool_use: |\n | prompt=\"...\" | | call tools |\n | description=\"\" | | append results |\n | | summary | |\n | result = \"...\" | <--------- | return last text |\n +------------------+ +------------------+\n |\n Parent context stays clean.\n Subagent context is discarded.\n\nKey insight: \"Fresh messages=[] gives context isolation. The parent stays clean.\"\n\nNote: Real Claude Code also uses in-process isolation (not OS-level process\nforking). The child runs in the same process with a fresh message array and\nisolated tool context -- same pattern as this teaching implementation.\n\n Comparison with real Claude Code:\n +-------------------+------------------+----------------------------------+\n | Aspect | This demo | Real Claude Code |\n +-------------------+------------------+----------------------------------+\n | Backend | in-process only | 5 backends: in-process, tmux, |\n | | | iTerm2, fork, remote |\n | Context isolation | fresh messages=[]| createSubagentContext() isolates |\n | | | ~20 fields (tools, permissions, |\n | | | cwd, env, hooks, etc.) |\n | Tool filtering | manually curated | resolveAgentTools() filters from |\n | | | parent pool; allowedTools |\n | | | replaces all allow rules |\n | Agent definition | hardcoded system | .claude/agents/*.md with YAML |\n | | prompt | frontmatter (AgentTemplate) |\n +-------------------+------------------+----------------------------------+\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use the task tool to delegate exploration or subtasks.\"\nSUBAGENT_SYSTEM = f\"You are a coding subagent at {WORKDIR}. Complete the given task, then summarize your findings.\"\n\n\nclass AgentTemplate:\n \"\"\"\n Parse agent definition from markdown frontmatter.\n\n Real Claude Code loads agent definitions from .claude/agents/*.md.\n Frontmatter fields: name, tools, disallowedTools, skills, hooks,\n model, effort, permissionMode, maxTurns, memory, isolation, color,\n background, initialPrompt, mcpServers.\n 3 sources: built-in, custom (.claude/agents/), plugin-provided.\n \"\"\"\n def __init__(self, path):\n self.path = Path(path)\n self.name = self.path.stem\n self.config = {}\n self.system_prompt = \"\"\n self._parse()\n\n def _parse(self):\n text = self.path.read_text()\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n(.*)\", text, re.DOTALL)\n if not match:\n self.system_prompt = text\n return\n for line in match.group(1).splitlines():\n if \":\" in line:\n k, _, v = line.partition(\":\")\n self.config[k.strip()] = v.strip()\n self.system_prompt = match.group(2).strip()\n self.name = self.config.get(\"name\", self.name)\n\n\n# -- Tool implementations shared by parent and child --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n except (FileNotFoundError, OSError) as e:\n return f\"Error: {e}\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\n# Child gets all base tools except task (no recursive spawning)\nCHILD_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- Subagent: fresh context, filtered tools, summary-only return --\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}] # fresh context\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n # Only the final text returns to the parent -- child context is discarded\n return \"\".join(b.text for b in response.content if hasattr(b, \"text\")) or \"(no summary)\"\n\n\n# -- Parent tools: base tools + task dispatcher --\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\", \"description\": \"Spawn a subagent with fresh context. It shares the filesystem but not conversation history.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"prompt\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\", \"description\": \"Short description of the task\"}}, \"required\": [\"prompt\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=PARENT_TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"task\":\n desc = block.input.get(\"description\", \"subtask\")\n prompt = block.input.get(\"prompt\", \"\")\n print(f\"> task ({desc}): {prompt[:80]}\")\n output = run_subagent(prompt)\n else:\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\" {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms04 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ {
+ "id": "s05",
+ "filename": "s05_skill_loading.py",
+ "title": "Skills",
+ "subtitle": "Discover Cheap, Load Deep",
+ "loc": 244,
+ "tools": [
+ "bash",
+ "read_file",
+ "write_file",
+ "edit_file",
+ "load_skill"
+ ],
+ "newTools": [
+ "load_skill"
+ ],
+ "coreAddition": "Skill registry + on-demand injection",
+ "keyInsight": "Discover cheaply, load deeply -- only when needed.",
+ "classes": [
+ {
+ "name": "SkillManifest",
+ "startLine": 36,
+ "endLine": 42
+ },
+ {
+ "name": "SkillDocument",
+ "startLine": 43,
+ "endLine": 47
+ },
+ {
+ "name": "SkillRegistry",
+ "startLine": 48,
+ "endLine": 99
+ }
+ ],
+ "functions": [
+ {
+ "name": "safe_path",
+ "signature": "def safe_path(path_str: str)",
+ "startLine": 110
+ },
+ {
+ "name": "run_bash",
+ "signature": "def run_bash(command: str)",
+ "startLine": 117
+ },
+ {
+ "name": "run_read",
+ "signature": "def run_read(path: str, limit: int | None = None)",
+ "startLine": 137
+ },
+ {
+ "name": "run_write",
+ "signature": "def run_write(path: str, content: str)",
+ "startLine": 147
+ },
+ {
+ "name": "run_edit",
+ "signature": "def run_edit(path: str, old_text: str, new_text: str)",
+ "startLine": 157
+ },
+ {
+ "name": "extract_text",
+ "signature": "def extract_text(content)",
+ "startLine": 236
+ },
+ {
+ "name": "agent_loop",
+ "signature": "def agent_loop(messages: list)",
+ "startLine": 247
+ }
+ ],
+ "layer": "core",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: on-demand knowledge -- discover skills cheaply, load them only when needed.\n\"\"\"\ns05_skill_loading.py - Skills\n\nThis chapter teaches a two-layer skill model:\n\n1. Put a cheap skill catalog in the system prompt.\n2. Load the full skill body only when the model asks for it.\n\nThat keeps the prompt small while still giving the model access to reusable,\ntask-specific guidance.\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom dataclasses import dataclass\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nSKILLS_DIR = WORKDIR / \"skills\"\n\n\n@dataclass\nclass SkillManifest:\n name: str\n description: str\n path: Path\n\n\n@dataclass\nclass SkillDocument:\n manifest: SkillManifest\n body: str\n\n\nclass SkillRegistry:\n def __init__(self, skills_dir: Path):\n self.skills_dir = skills_dir\n self.documents: dict[str, SkillDocument] = {}\n self._load_all()\n\n def _load_all(self) -> None:\n if not self.skills_dir.exists():\n return\n\n for path in sorted(self.skills_dir.rglob(\"SKILL.md\")):\n meta, body = self._parse_frontmatter(path.read_text())\n name = meta.get(\"name\", path.parent.name)\n description = meta.get(\"description\", \"No description\")\n manifest = SkillManifest(name=name, description=description, path=path)\n self.documents[name] = SkillDocument(manifest=manifest, body=body.strip())\n\n def _parse_frontmatter(self, text: str) -> tuple[dict, str]:\n match = re.match(r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL)\n if not match:\n return {}, text\n\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" not in line:\n continue\n key, value = line.split(\":\", 1)\n meta[key.strip()] = value.strip()\n return meta, match.group(2)\n\n def describe_available(self) -> str:\n if not self.documents:\n return \"(no skills available)\"\n lines = []\n for name in sorted(self.documents):\n manifest = self.documents[name].manifest\n lines.append(f\"- {manifest.name}: {manifest.description}\")\n return \"\\n\".join(lines)\n\n def load_full_text(self, name: str) -> str:\n document = self.documents.get(name)\n if not document:\n known = \", \".join(sorted(self.documents)) or \"(none)\"\n return f\"Error: Unknown skill '{name}'. Available skills: {known}\"\n\n return (\n f\"\\n\"\n f\"{document.body}\\n\"\n \" \"\n )\n\n\nSKILL_REGISTRY = SkillRegistry(SKILLS_DIR)\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse load_skill when a task needs specialized instructions before you act.\n\nSkills available:\n{SKILL_REGISTRY.describe_available()}\n\"\"\"\n\n\ndef safe_path(path_str: str) -> Path:\n path = (WORKDIR / path_str).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {path_str}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n output = (result.stdout + result.stderr).strip()\n return output[:50000] if output else \"(no output)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n content = file_path.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n file_path.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"load_skill\": lambda **kw: SKILL_REGISTRY.load_full_text(kw[\"name\"]),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to a file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"load_skill\",\n \"description\": \"Load the full body of a named skill into the current context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n]\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef agent_loop(messages: list) -> None:\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as exc:\n output = f\"Error: {exc}\"\n\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms05 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): on-demand knowledge(按需知识)——低成本发现技能,仅在需要时加载。\n\"\"\"\ns05_skill_loading.py - Skills(技能系统)\n\n本章讲解两层技能模型:\n\n1. 在系统提示中放入轻量 skill catalog(技能目录)。\n2. 仅当模型请求时,再加载 skill 正文。\n\n这样既能控制提示长度,又能让模型按需获得可复用、任务相关的指导。\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom dataclasses import dataclass\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\nSKILLS_DIR = WORKDIR / \"skills\"\n\n\n@dataclass\nclass SkillManifest:\n name: str\n description: str\n path: Path\n\n\n@dataclass\nclass SkillDocument:\n manifest: SkillManifest\n body: str\n\n\nclass SkillRegistry:\n def __init__(self, skills_dir: Path):\n self.skills_dir = skills_dir\n self.documents: dict[str, SkillDocument] = {}\n self._load_all()\n\n def _load_all(self) -> None:\n if not self.skills_dir.exists():\n return\n\n for path in sorted(self.skills_dir.rglob(\"SKILL.md\")):\n meta, body = self._parse_frontmatter(path.read_text())\n name = meta.get(\"name\", path.parent.name)\n description = meta.get(\"description\", \"No description\")\n manifest = SkillManifest(name=name, description=description, path=path)\n self.documents[name] = SkillDocument(manifest=manifest, body=body.strip())\n\n def _parse_frontmatter(self, text: str) -> tuple[dict, str]:\n match = re.match(r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL)\n if not match:\n return {}, text\n\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" not in line:\n continue\n key, value = line.split(\":\", 1)\n meta[key.strip()] = value.strip()\n return meta, match.group(2)\n\n def describe_available(self) -> str:\n if not self.documents:\n return \"(no skills available)\"\n lines = []\n for name in sorted(self.documents):\n manifest = self.documents[name].manifest\n lines.append(f\"- {manifest.name}: {manifest.description}\")\n return \"\\n\".join(lines)\n\n def load_full_text(self, name: str) -> str:\n document = self.documents.get(name)\n if not document:\n known = \", \".join(sorted(self.documents)) or \"(none)\"\n return f\"Error: Unknown skill '{name}'. Available skills: {known}\"\n\n return (\n f\"\\n\"\n f\"{document.body}\\n\"\n \" \"\n )\n\n\nSKILL_REGISTRY = SkillRegistry(SKILLS_DIR)\n\nSYSTEM = f\"\"\"你是位于 {WORKDIR} 的 coding agent(编码智能体)。\n当任务在执行前需要专门指引时,使用 load_skill。\n\n当前可用技能:\n{SKILL_REGISTRY.describe_available()}\n\"\"\"\n\n\ndef safe_path(path_str: str) -> Path:\n path = (WORKDIR / path_str).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {path_str}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n output = (result.stdout + result.stderr).strip()\n return output[:50000] if output else \"(no output)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n content = file_path.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n file_path.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"load_skill\": lambda **kw: SKILL_REGISTRY.load_full_text(kw[\"name\"]),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"读取文件内容。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"向文件写入内容。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"在文件中执行一次精确文本替换。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"load_skill\",\n \"description\": \"将指定技能的完整正文加载到当前上下文。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n]\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef agent_loop(messages: list) -> None:\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as exc:\n output = f\"Error: {exc}\"\n\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms05 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: on-demand knowledge -- discover skills cheaply, load them only when needed.\n\"\"\"\ns05_skill_loading.py - Skills\n\nThis chapter teaches a two-layer skill model:\n\n1. Put a cheap skill catalog in the system prompt.\n2. Load the full skill body only when the model asks for it.\n\nThat keeps the prompt small while still giving the model access to reusable,\ntask-specific guidance.\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom dataclasses import dataclass\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nSKILLS_DIR = WORKDIR / \"skills\"\n\n\n@dataclass\nclass SkillManifest:\n name: str\n description: str\n path: Path\n\n\n@dataclass\nclass SkillDocument:\n manifest: SkillManifest\n body: str\n\n\nclass SkillRegistry:\n def __init__(self, skills_dir: Path):\n self.skills_dir = skills_dir\n self.documents: dict[str, SkillDocument] = {}\n self._load_all()\n\n def _load_all(self) -> None:\n if not self.skills_dir.exists():\n return\n\n for path in sorted(self.skills_dir.rglob(\"SKILL.md\")):\n meta, body = self._parse_frontmatter(path.read_text())\n name = meta.get(\"name\", path.parent.name)\n description = meta.get(\"description\", \"No description\")\n manifest = SkillManifest(name=name, description=description, path=path)\n self.documents[name] = SkillDocument(manifest=manifest, body=body.strip())\n\n def _parse_frontmatter(self, text: str) -> tuple[dict, str]:\n match = re.match(r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL)\n if not match:\n return {}, text\n\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" not in line:\n continue\n key, value = line.split(\":\", 1)\n meta[key.strip()] = value.strip()\n return meta, match.group(2)\n\n def describe_available(self) -> str:\n if not self.documents:\n return \"(no skills available)\"\n lines = []\n for name in sorted(self.documents):\n manifest = self.documents[name].manifest\n lines.append(f\"- {manifest.name}: {manifest.description}\")\n return \"\\n\".join(lines)\n\n def load_full_text(self, name: str) -> str:\n document = self.documents.get(name)\n if not document:\n known = \", \".join(sorted(self.documents)) or \"(none)\"\n return f\"Error: Unknown skill '{name}'. Available skills: {known}\"\n\n return (\n f\"\\n\"\n f\"{document.body}\\n\"\n \" \"\n )\n\n\nSKILL_REGISTRY = SkillRegistry(SKILLS_DIR)\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse load_skill when a task needs specialized instructions before you act.\n\nSkills available:\n{SKILL_REGISTRY.describe_available()}\n\"\"\"\n\n\ndef safe_path(path_str: str) -> Path:\n path = (WORKDIR / path_str).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {path_str}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n output = (result.stdout + result.stderr).strip()\n return output[:50000] if output else \"(no output)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n content = file_path.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n file_path.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"load_skill\": lambda **kw: SKILL_REGISTRY.load_full_text(kw[\"name\"]),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to a file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"load_skill\",\n \"description\": \"Load the full body of a named skill into the current context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n]\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef agent_loop(messages: list) -> None:\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as exc:\n output = f\"Error: {exc}\"\n\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms05 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: on-demand knowledge -- discover skills cheaply, load them only when needed.\n\"\"\"\ns05_skill_loading.py - Skills\n\nThis chapter teaches a two-layer skill model:\n\n1. Put a cheap skill catalog in the system prompt.\n2. Load the full skill body only when the model asks for it.\n\nThat keeps the prompt small while still giving the model access to reusable,\ntask-specific guidance.\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom dataclasses import dataclass\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nSKILLS_DIR = WORKDIR / \"skills\"\n\n\n@dataclass\nclass SkillManifest:\n name: str\n description: str\n path: Path\n\n\n@dataclass\nclass SkillDocument:\n manifest: SkillManifest\n body: str\n\n\nclass SkillRegistry:\n def __init__(self, skills_dir: Path):\n self.skills_dir = skills_dir\n self.documents: dict[str, SkillDocument] = {}\n self._load_all()\n\n def _load_all(self) -> None:\n if not self.skills_dir.exists():\n return\n\n for path in sorted(self.skills_dir.rglob(\"SKILL.md\")):\n meta, body = self._parse_frontmatter(path.read_text())\n name = meta.get(\"name\", path.parent.name)\n description = meta.get(\"description\", \"No description\")\n manifest = SkillManifest(name=name, description=description, path=path)\n self.documents[name] = SkillDocument(manifest=manifest, body=body.strip())\n\n def _parse_frontmatter(self, text: str) -> tuple[dict, str]:\n match = re.match(r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL)\n if not match:\n return {}, text\n\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" not in line:\n continue\n key, value = line.split(\":\", 1)\n meta[key.strip()] = value.strip()\n return meta, match.group(2)\n\n def describe_available(self) -> str:\n if not self.documents:\n return \"(no skills available)\"\n lines = []\n for name in sorted(self.documents):\n manifest = self.documents[name].manifest\n lines.append(f\"- {manifest.name}: {manifest.description}\")\n return \"\\n\".join(lines)\n\n def load_full_text(self, name: str) -> str:\n document = self.documents.get(name)\n if not document:\n known = \", \".join(sorted(self.documents)) or \"(none)\"\n return f\"Error: Unknown skill '{name}'. Available skills: {known}\"\n\n return (\n f\"\\n\"\n f\"{document.body}\\n\"\n \" \"\n )\n\n\nSKILL_REGISTRY = SkillRegistry(SKILLS_DIR)\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse load_skill when a task needs specialized instructions before you act.\n\nSkills available:\n{SKILL_REGISTRY.describe_available()}\n\"\"\"\n\n\ndef safe_path(path_str: str) -> Path:\n path = (WORKDIR / path_str).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {path_str}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n output = (result.stdout + result.stderr).strip()\n return output[:50000] if output else \"(no output)\"\n\n\ndef run_read(path: str, limit: int | None = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n content = file_path.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n file_path.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"load_skill\": lambda **kw: SKILL_REGISTRY.load_full_text(kw[\"name\"]),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to a file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"load_skill\",\n \"description\": \"Load the full body of a named skill into the current context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n]\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef agent_loop(messages: list) -> None:\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as exc:\n output = f\"Error: {exc}\"\n\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms05 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n"
+ },
+ {
+ "id": "s06",
+ "filename": "s06_context_compact.py",
+ "title": "Context Compact",
+ "subtitle": "Keep the Active Context Small",
+ "loc": 308,
+ "tools": [
+ "bash",
+ "read_file",
+ "write_file",
+ "edit_file",
+ "compact"
+ ],
+ "newTools": [
+ "compact"
+ ],
+ "coreAddition": "Persist markers + micro compact + summary compact",
+ "keyInsight": "Compaction isn't deleting history -- it's relocating detail so the agent can keep working.",
+ "classes": [
+ {
+ "name": "CompactState",
+ "startLine": 50,
+ "endLine": 55
+ }
+ ],
+ "functions": [
+ {
+ "name": "estimate_context_size",
+ "signature": "def estimate_context_size(messages: list)",
+ "startLine": 56
+ },
+ {
+ "name": "track_recent_file",
+ "signature": "def track_recent_file(state: CompactState, path: str)",
+ "startLine": 60
+ },
+ {
+ "name": "safe_path",
+ "signature": "def safe_path(path_str: str)",
+ "startLine": 68
+ },
+ {
+ "name": "persist_large_output",
+ "signature": "def persist_large_output(tool_use_id: str, output: str)",
+ "startLine": 75
+ },
+ {
+ "name": "collect_tool_result_blocks",
+ "signature": "def collect_tool_result_blocks(messages: list)",
+ "startLine": 95
+ },
+ {
+ "name": "micro_compact",
+ "signature": "def micro_compact(messages: list)",
+ "startLine": 107
+ },
+ {
+ "name": "write_transcript",
+ "signature": "def write_transcript(messages: list)",
+ "startLine": 120
+ },
+ {
+ "name": "summarize_history",
+ "signature": "def summarize_history(messages: list)",
+ "startLine": 129
+ },
+ {
+ "name": "compact_history",
+ "signature": "def compact_history(messages: list, state: CompactState, focus: str | None = None)",
+ "startLine": 150
+ },
+ {
+ "name": "run_bash",
+ "signature": "def run_bash(command: str, tool_use_id: str)",
+ "startLine": 173
+ },
+ {
+ "name": "run_read",
+ "signature": "def run_read(path: str, tool_use_id: str, state: CompactState, limit: int | None = None)",
+ "startLine": 193
+ },
+ {
+ "name": "run_write",
+ "signature": "def run_write(path: str, content: str)",
+ "startLine": 205
+ },
+ {
+ "name": "run_edit",
+ "signature": "def run_edit(path: str, old_text: str, new_text: str)",
+ "startLine": 215
+ },
+ {
+ "name": "extract_text",
+ "signature": "def extract_text(content)",
+ "startLine": 287
+ },
+ {
+ "name": "execute_tool",
+ "signature": "def execute_tool(block, state: CompactState)",
+ "startLine": 298
+ },
+ {
+ "name": "agent_loop",
+ "signature": "def agent_loop(messages: list, state: CompactState)",
+ "startLine": 312
+ }
+ ],
+ "layer": "core",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: compression -- keep the active context small enough to keep working.\n\"\"\"\ns06_context_compact.py - Context Compact\n\nThis teaching version keeps the compact model intentionally small:\n\n1. Large tool output is persisted to disk and replaced with a preview marker.\n2. Older tool results are micro-compacted into short placeholders.\n3. When the whole conversation gets too large, the agent summarizes it and\n continues from that summary.\n\nThe goal is not to model every production branch. The goal is to make the\nactive-context idea explicit and teachable.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport time\nfrom dataclasses import dataclass, field\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Keep working step by step, and use compact if the conversation gets too long.\"\n)\n\nCONTEXT_LIMIT = 50000\nKEEP_RECENT_TOOL_RESULTS = 3\nPERSIST_THRESHOLD = 30000\nPREVIEW_CHARS = 2000\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\n\n\n@dataclass\nclass CompactState:\n has_compacted: bool = False\n last_summary: str = \"\"\n recent_files: list[str] = field(default_factory=list)\n\n\ndef estimate_context_size(messages: list) -> int:\n return len(str(messages))\n\n\ndef track_recent_file(state: CompactState, path: str) -> None:\n if path in state.recent_files:\n state.recent_files.remove(path)\n state.recent_files.append(path)\n if len(state.recent_files) > 5:\n state.recent_files[:] = state.recent_files[-5:]\n\n\ndef safe_path(path_str: str) -> Path:\n path = (WORKDIR / path_str).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {path_str}\")\n return path\n\n\ndef persist_large_output(tool_use_id: str, output: str) -> str:\n if len(output) <= PERSIST_THRESHOLD:\n return output\n\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n stored_path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not stored_path.exists():\n stored_path.write_text(output)\n\n preview = output[:PREVIEW_CHARS]\n rel_path = stored_path.relative_to(WORKDIR)\n return (\n \"\\n\"\n f\"Full output saved to: {rel_path}\\n\"\n \"Preview:\\n\"\n f\"{preview}\\n\"\n \" \"\n )\n\n\ndef collect_tool_result_blocks(messages: list) -> list[tuple[int, int, dict]]:\n blocks = []\n for message_index, message in enumerate(messages):\n content = message.get(\"content\")\n if message.get(\"role\") != \"user\" or not isinstance(content, list):\n continue\n for block_index, block in enumerate(content):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n blocks.append((message_index, block_index, block))\n return blocks\n\n\ndef micro_compact(messages: list) -> list:\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n content = block.get(\"content\", \"\")\n if not isinstance(content, str) or len(content) <= 120:\n continue\n block[\"content\"] = \"[Earlier tool result compacted. Re-run the tool if you need full detail.]\"\n return messages\n\n\ndef write_transcript(messages: list) -> Path:\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as handle:\n for message in messages:\n handle.write(json.dumps(message, default=str) + \"\\n\")\n return path\n\n\ndef summarize_history(messages: list) -> str:\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\n \"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve:\\n\"\n \"1. The current goal\\n\"\n \"2. Important findings and decisions\\n\"\n \"3. Files read or changed\\n\"\n \"4. Remaining work\\n\"\n \"5. User constraints and preferences\\n\"\n \"Be compact but concrete.\\n\\n\"\n f\"{conversation}\"\n )\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=2000,\n )\n return response.content[0].text.strip()\n\n\ndef compact_history(messages: list, state: CompactState, focus: str | None = None) -> list:\n transcript_path = write_transcript(messages)\n print(f\"[transcript saved: {transcript_path}]\")\n\n summary = summarize_history(messages)\n if focus:\n summary += f\"\\n\\nFocus to preserve next: {focus}\"\n if state.recent_files:\n recent_lines = \"\\n\".join(f\"- {path}\" for path in state.recent_files)\n summary += f\"\\n\\nRecent files to reopen if needed:\\n{recent_lines}\"\n\n state.has_compacted = True\n state.last_summary = summary\n\n return [{\n \"role\": \"user\",\n \"content\": (\n \"This conversation was compacted so the agent can continue working.\\n\\n\"\n f\"{summary}\"\n ),\n }]\n\n\ndef run_bash(command: str, tool_use_id: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n output = (result.stdout + result.stderr).strip() or \"(no output)\"\n return persist_large_output(tool_use_id, output)\n\n\ndef run_read(path: str, tool_use_id: str, state: CompactState, limit: int | None = None) -> str:\n try:\n track_recent_file(state, path)\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n output = \"\\n\".join(lines)\n return persist_large_output(tool_use_id, output)\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n content = file_path.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n file_path.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to a file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"compact\",\n \"description\": \"Summarize earlier conversation so work can continue in a smaller context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"focus\": {\"type\": \"string\"},\n },\n },\n },\n]\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef execute_tool(block, state: CompactState) -> str:\n if block.name == \"bash\":\n return run_bash(block.input[\"command\"], block.id)\n if block.name == \"read_file\":\n return run_read(block.input[\"path\"], block.id, state, block.input.get(\"limit\"))\n if block.name == \"write_file\":\n return run_write(block.input[\"path\"], block.input[\"content\"])\n if block.name == \"edit_file\":\n return run_edit(block.input[\"path\"], block.input[\"old_text\"], block.input[\"new_text\"])\n if block.name == \"compact\":\n return \"Compacting conversation...\"\n return f\"Unknown tool: {block.name}\"\n\n\ndef agent_loop(messages: list, state: CompactState) -> None:\n while True:\n messages[:] = micro_compact(messages)\n\n if estimate_context_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages, state)\n\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n manual_compact = False\n compact_focus = None\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n output = execute_tool(block, state)\n if block.name == \"compact\":\n manual_compact = True\n compact_focus = (block.input or {}).get(\"focus\")\n\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n if manual_compact:\n print(\"[manual compact]\")\n messages[:] = compact_history(messages, state, focus=compact_focus)\n\n\nif __name__ == \"__main__\":\n history = []\n compact_state = CompactState()\n\n while True:\n try:\n query = input(\"\\033[36ms06 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, compact_state)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): compression(压缩)——把活跃上下文控制在可持续工作范围内。\n\"\"\"\ns06_context_compact.py - Context Compact(上下文压缩)\n\n本教学版刻意把 compact 模型保持在最小可讲解规模:\n\n1. 大体积工具输出落盘保存,并在消息中替换为预览标记。\n2. 较早的工具结果会进行 micro-compact(微压缩),变为短占位文本。\n3. 当整段会话过大时,智能体先摘要,再从摘要继续执行。\n\n目标不是覆盖生产系统所有分支,而是把 active-context(活跃上下文)机制\n清晰、可教学地显式化。\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport time\nfrom dataclasses import dataclass, field\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = (\n f\"你是位于 {WORKDIR} 的 coding agent(编码智能体)。\"\n \"请按步骤持续工作;当会话过长时,使用 compact 进行压缩。\"\n)\n\nCONTEXT_LIMIT = 50000\nKEEP_RECENT_TOOL_RESULTS = 3\nPERSIST_THRESHOLD = 30000\nPREVIEW_CHARS = 2000\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\n\n\n@dataclass\nclass CompactState:\n has_compacted: bool = False\n last_summary: str = \"\"\n recent_files: list[str] = field(default_factory=list)\n\n\ndef estimate_context_size(messages: list) -> int:\n return len(str(messages))\n\n\ndef track_recent_file(state: CompactState, path: str) -> None:\n if path in state.recent_files:\n state.recent_files.remove(path)\n state.recent_files.append(path)\n if len(state.recent_files) > 5:\n state.recent_files[:] = state.recent_files[-5:]\n\n\ndef safe_path(path_str: str) -> Path:\n path = (WORKDIR / path_str).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {path_str}\")\n return path\n\n\ndef persist_large_output(tool_use_id: str, output: str) -> str:\n if len(output) <= PERSIST_THRESHOLD:\n return output\n\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n stored_path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not stored_path.exists():\n stored_path.write_text(output)\n\n preview = output[:PREVIEW_CHARS]\n rel_path = stored_path.relative_to(WORKDIR)\n return (\n \"\\n\"\n f\"完整输出已保存至:{rel_path}\\n\"\n \"预览:\\n\"\n f\"{preview}\\n\"\n \" \"\n )\n\n\ndef collect_tool_result_blocks(messages: list) -> list[tuple[int, int, dict]]:\n blocks = []\n for message_index, message in enumerate(messages):\n content = message.get(\"content\")\n if message.get(\"role\") != \"user\" or not isinstance(content, list):\n continue\n for block_index, block in enumerate(content):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n blocks.append((message_index, block_index, block))\n return blocks\n\n\ndef micro_compact(messages: list) -> list:\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n content = block.get(\"content\", \"\")\n if not isinstance(content, str) or len(content) <= 120:\n continue\n block[\"content\"] = \"[较早工具结果已压缩;若需完整细节请重新执行该工具。]\"\n return messages\n\n\ndef write_transcript(messages: list) -> Path:\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as handle:\n for message in messages:\n handle.write(json.dumps(message, default=str) + \"\\n\")\n return path\n\n\ndef summarize_history(messages: list) -> str:\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\n \"请总结这段 coding-agent 会话,以便后续继续工作。\\n\"\n \"请保留:\\n\"\n \"1. 当前目标\\n\"\n \"2. 关键发现与决策\\n\"\n \"3. 已读取或修改的文件\\n\"\n \"4. 尚未完成的工作\\n\"\n \"5. 用户约束与偏好\\n\"\n \"要求精简但具体。\\n\\n\"\n f\"{conversation}\"\n )\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=2000,\n )\n return response.content[0].text.strip()\n\n\ndef compact_history(messages: list, state: CompactState, focus: str | None = None) -> list:\n transcript_path = write_transcript(messages)\n print(f\"[transcript saved: {transcript_path}]\")\n\n summary = summarize_history(messages)\n if focus:\n summary += f\"\\n\\n下一阶段需重点保留:{focus}\"\n if state.recent_files:\n recent_lines = \"\\n\".join(f\"- {path}\" for path in state.recent_files)\n summary += f\"\\n\\n如有需要可优先重开这些近期文件:\\n{recent_lines}\"\n\n state.has_compacted = True\n state.last_summary = summary\n\n return [{\n \"role\": \"user\",\n \"content\": (\n \"会话已执行 compact 压缩,以便智能体继续工作。\\n\\n\"\n f\"{summary}\"\n ),\n }]\n\n\ndef run_bash(command: str, tool_use_id: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n output = (result.stdout + result.stderr).strip() or \"(no output)\"\n return persist_large_output(tool_use_id, output)\n\n\ndef run_read(path: str, tool_use_id: str, state: CompactState, limit: int | None = None) -> str:\n try:\n track_recent_file(state, path)\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n output = \"\\n\".join(lines)\n return persist_large_output(tool_use_id, output)\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n content = file_path.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n file_path.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"读取文件内容。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"向文件写入内容。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"在文件中执行一次精确文本替换。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"compact\",\n \"description\": \"总结较早会话内容,让后续可在更小上下文中继续执行。\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"focus\": {\"type\": \"string\"},\n },\n },\n },\n]\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef execute_tool(block, state: CompactState) -> str:\n if block.name == \"bash\":\n return run_bash(block.input[\"command\"], block.id)\n if block.name == \"read_file\":\n return run_read(block.input[\"path\"], block.id, state, block.input.get(\"limit\"))\n if block.name == \"write_file\":\n return run_write(block.input[\"path\"], block.input[\"content\"])\n if block.name == \"edit_file\":\n return run_edit(block.input[\"path\"], block.input[\"old_text\"], block.input[\"new_text\"])\n if block.name == \"compact\":\n return \"正在压缩会话...\"\n return f\"Unknown tool: {block.name}\"\n\n\ndef agent_loop(messages: list, state: CompactState) -> None:\n while True:\n messages[:] = micro_compact(messages)\n\n if estimate_context_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages, state)\n\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n manual_compact = False\n compact_focus = None\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n output = execute_tool(block, state)\n if block.name == \"compact\":\n manual_compact = True\n compact_focus = (block.input or {}).get(\"focus\")\n\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n if manual_compact:\n print(\"[manual compact]\")\n messages[:] = compact_history(messages, state, focus=compact_focus)\n\n\nif __name__ == \"__main__\":\n history = []\n compact_state = CompactState()\n\n while True:\n try:\n query = input(\"\\033[36ms06 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, compact_state)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: compression -- keep the active context small enough to keep working.\n\"\"\"\ns06_context_compact.py - Context Compact\n\nThis teaching version keeps the compact model intentionally small:\n\n1. Large tool output is persisted to disk and replaced with a preview marker.\n2. Older tool results are micro-compacted into short placeholders.\n3. When the whole conversation gets too large, the agent summarizes it and\n continues from that summary.\n\nThe goal is not to model every production branch. The goal is to make the\nactive-context idea explicit and teachable.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport time\nfrom dataclasses import dataclass, field\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Keep working step by step, and use compact if the conversation gets too long.\"\n)\n\nCONTEXT_LIMIT = 50000\nKEEP_RECENT_TOOL_RESULTS = 3\nPERSIST_THRESHOLD = 30000\nPREVIEW_CHARS = 2000\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\n\n\n@dataclass\nclass CompactState:\n has_compacted: bool = False\n last_summary: str = \"\"\n recent_files: list[str] = field(default_factory=list)\n\n\ndef estimate_context_size(messages: list) -> int:\n return len(str(messages))\n\n\ndef track_recent_file(state: CompactState, path: str) -> None:\n if path in state.recent_files:\n state.recent_files.remove(path)\n state.recent_files.append(path)\n if len(state.recent_files) > 5:\n state.recent_files[:] = state.recent_files[-5:]\n\n\ndef safe_path(path_str: str) -> Path:\n path = (WORKDIR / path_str).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {path_str}\")\n return path\n\n\ndef persist_large_output(tool_use_id: str, output: str) -> str:\n if len(output) <= PERSIST_THRESHOLD:\n return output\n\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n stored_path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not stored_path.exists():\n stored_path.write_text(output)\n\n preview = output[:PREVIEW_CHARS]\n rel_path = stored_path.relative_to(WORKDIR)\n return (\n \"\\n\"\n f\"Full output saved to: {rel_path}\\n\"\n \"Preview:\\n\"\n f\"{preview}\\n\"\n \" \"\n )\n\n\ndef collect_tool_result_blocks(messages: list) -> list[tuple[int, int, dict]]:\n blocks = []\n for message_index, message in enumerate(messages):\n content = message.get(\"content\")\n if message.get(\"role\") != \"user\" or not isinstance(content, list):\n continue\n for block_index, block in enumerate(content):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n blocks.append((message_index, block_index, block))\n return blocks\n\n\ndef micro_compact(messages: list) -> list:\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n content = block.get(\"content\", \"\")\n if not isinstance(content, str) or len(content) <= 120:\n continue\n block[\"content\"] = \"[Earlier tool result compacted. Re-run the tool if you need full detail.]\"\n return messages\n\n\ndef write_transcript(messages: list) -> Path:\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as handle:\n for message in messages:\n handle.write(json.dumps(message, default=str) + \"\\n\")\n return path\n\n\ndef summarize_history(messages: list) -> str:\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\n \"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve:\\n\"\n \"1. The current goal\\n\"\n \"2. Important findings and decisions\\n\"\n \"3. Files read or changed\\n\"\n \"4. Remaining work\\n\"\n \"5. User constraints and preferences\\n\"\n \"Be compact but concrete.\\n\\n\"\n f\"{conversation}\"\n )\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=2000,\n )\n return response.content[0].text.strip()\n\n\ndef compact_history(messages: list, state: CompactState, focus: str | None = None) -> list:\n transcript_path = write_transcript(messages)\n print(f\"[transcript saved: {transcript_path}]\")\n\n summary = summarize_history(messages)\n if focus:\n summary += f\"\\n\\nFocus to preserve next: {focus}\"\n if state.recent_files:\n recent_lines = \"\\n\".join(f\"- {path}\" for path in state.recent_files)\n summary += f\"\\n\\nRecent files to reopen if needed:\\n{recent_lines}\"\n\n state.has_compacted = True\n state.last_summary = summary\n\n return [{\n \"role\": \"user\",\n \"content\": (\n \"This conversation was compacted so the agent can continue working.\\n\\n\"\n f\"{summary}\"\n ),\n }]\n\n\ndef run_bash(command: str, tool_use_id: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n output = (result.stdout + result.stderr).strip() or \"(no output)\"\n return persist_large_output(tool_use_id, output)\n\n\ndef run_read(path: str, tool_use_id: str, state: CompactState, limit: int | None = None) -> str:\n try:\n track_recent_file(state, path)\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n output = \"\\n\".join(lines)\n return persist_large_output(tool_use_id, output)\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n content = file_path.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n file_path.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to a file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"compact\",\n \"description\": \"Summarize earlier conversation so work can continue in a smaller context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"focus\": {\"type\": \"string\"},\n },\n },\n },\n]\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef execute_tool(block, state: CompactState) -> str:\n if block.name == \"bash\":\n return run_bash(block.input[\"command\"], block.id)\n if block.name == \"read_file\":\n return run_read(block.input[\"path\"], block.id, state, block.input.get(\"limit\"))\n if block.name == \"write_file\":\n return run_write(block.input[\"path\"], block.input[\"content\"])\n if block.name == \"edit_file\":\n return run_edit(block.input[\"path\"], block.input[\"old_text\"], block.input[\"new_text\"])\n if block.name == \"compact\":\n return \"Compacting conversation...\"\n return f\"Unknown tool: {block.name}\"\n\n\ndef agent_loop(messages: list, state: CompactState) -> None:\n while True:\n messages[:] = micro_compact(messages)\n\n if estimate_context_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages, state)\n\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n manual_compact = False\n compact_focus = None\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n output = execute_tool(block, state)\n if block.name == \"compact\":\n manual_compact = True\n compact_focus = (block.input or {}).get(\"focus\")\n\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n if manual_compact:\n print(\"[manual compact]\")\n messages[:] = compact_history(messages, state, focus=compact_focus)\n\n\nif __name__ == \"__main__\":\n history = []\n compact_state = CompactState()\n\n while True:\n try:\n query = input(\"\\033[36ms06 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, compact_state)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: compression -- keep the active context small enough to keep working.\n\"\"\"\ns06_context_compact.py - Context Compact\n\nThis teaching version keeps the compact model intentionally small:\n\n1. Large tool output is persisted to disk and replaced with a preview marker.\n2. Older tool results are micro-compacted into short placeholders.\n3. When the whole conversation gets too large, the agent summarizes it and\n continues from that summary.\n\nThe goal is not to model every production branch. The goal is to make the\nactive-context idea explicit and teachable.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport time\nfrom dataclasses import dataclass, field\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Keep working step by step, and use compact if the conversation gets too long.\"\n)\n\nCONTEXT_LIMIT = 50000\nKEEP_RECENT_TOOL_RESULTS = 3\nPERSIST_THRESHOLD = 30000\nPREVIEW_CHARS = 2000\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nTOOL_RESULTS_DIR = WORKDIR / \".task_outputs\" / \"tool-results\"\n\n\n@dataclass\nclass CompactState:\n has_compacted: bool = False\n last_summary: str = \"\"\n recent_files: list[str] = field(default_factory=list)\n\n\ndef estimate_context_size(messages: list) -> int:\n return len(str(messages))\n\n\ndef track_recent_file(state: CompactState, path: str) -> None:\n if path in state.recent_files:\n state.recent_files.remove(path)\n state.recent_files.append(path)\n if len(state.recent_files) > 5:\n state.recent_files[:] = state.recent_files[-5:]\n\n\ndef safe_path(path_str: str) -> Path:\n path = (WORKDIR / path_str).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {path_str}\")\n return path\n\n\ndef persist_large_output(tool_use_id: str, output: str) -> str:\n if len(output) <= PERSIST_THRESHOLD:\n return output\n\n TOOL_RESULTS_DIR.mkdir(parents=True, exist_ok=True)\n stored_path = TOOL_RESULTS_DIR / f\"{tool_use_id}.txt\"\n if not stored_path.exists():\n stored_path.write_text(output)\n\n preview = output[:PREVIEW_CHARS]\n rel_path = stored_path.relative_to(WORKDIR)\n return (\n \"\\n\"\n f\"Full output saved to: {rel_path}\\n\"\n \"Preview:\\n\"\n f\"{preview}\\n\"\n \" \"\n )\n\n\ndef collect_tool_result_blocks(messages: list) -> list[tuple[int, int, dict]]:\n blocks = []\n for message_index, message in enumerate(messages):\n content = message.get(\"content\")\n if message.get(\"role\") != \"user\" or not isinstance(content, list):\n continue\n for block_index, block in enumerate(content):\n if isinstance(block, dict) and block.get(\"type\") == \"tool_result\":\n blocks.append((message_index, block_index, block))\n return blocks\n\n\ndef micro_compact(messages: list) -> list:\n tool_results = collect_tool_result_blocks(messages)\n if len(tool_results) <= KEEP_RECENT_TOOL_RESULTS:\n return messages\n\n for _, _, block in tool_results[:-KEEP_RECENT_TOOL_RESULTS]:\n content = block.get(\"content\", \"\")\n if not isinstance(content, str) or len(content) <= 120:\n continue\n block[\"content\"] = \"[Earlier tool result compacted. Re-run the tool if you need full detail.]\"\n return messages\n\n\ndef write_transcript(messages: list) -> Path:\n TRANSCRIPT_DIR.mkdir(parents=True, exist_ok=True)\n path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with path.open(\"w\") as handle:\n for message in messages:\n handle.write(json.dumps(message, default=str) + \"\\n\")\n return path\n\n\ndef summarize_history(messages: list) -> str:\n conversation = json.dumps(messages, default=str)[:80000]\n prompt = (\n \"Summarize this coding-agent conversation so work can continue.\\n\"\n \"Preserve:\\n\"\n \"1. The current goal\\n\"\n \"2. Important findings and decisions\\n\"\n \"3. Files read or changed\\n\"\n \"4. Remaining work\\n\"\n \"5. User constraints and preferences\\n\"\n \"Be compact but concrete.\\n\\n\"\n f\"{conversation}\"\n )\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=2000,\n )\n return response.content[0].text.strip()\n\n\ndef compact_history(messages: list, state: CompactState, focus: str | None = None) -> list:\n transcript_path = write_transcript(messages)\n print(f\"[transcript saved: {transcript_path}]\")\n\n summary = summarize_history(messages)\n if focus:\n summary += f\"\\n\\nFocus to preserve next: {focus}\"\n if state.recent_files:\n recent_lines = \"\\n\".join(f\"- {path}\" for path in state.recent_files)\n summary += f\"\\n\\nRecent files to reopen if needed:\\n{recent_lines}\"\n\n state.has_compacted = True\n state.last_summary = summary\n\n return [{\n \"role\": \"user\",\n \"content\": (\n \"This conversation was compacted so the agent can continue working.\\n\\n\"\n f\"{summary}\"\n ),\n }]\n\n\ndef run_bash(command: str, tool_use_id: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(item in command for item in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n result = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n output = (result.stdout + result.stderr).strip() or \"(no output)\"\n return persist_large_output(tool_use_id, output)\n\n\ndef run_read(path: str, tool_use_id: str, state: CompactState, limit: int | None = None) -> str:\n try:\n track_recent_file(state, path)\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n output = \"\\n\".join(lines)\n return persist_large_output(tool_use_id, output)\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n file_path = safe_path(path)\n file_path.parent.mkdir(parents=True, exist_ok=True)\n file_path.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n file_path = safe_path(path)\n content = file_path.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n file_path.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as exc:\n return f\"Error: {exc}\"\n\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to a file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in a file once.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"compact\",\n \"description\": \"Summarize earlier conversation so work can continue in a smaller context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"focus\": {\"type\": \"string\"},\n },\n },\n },\n]\n\n\ndef extract_text(content) -> str:\n if not isinstance(content, list):\n return \"\"\n texts = []\n for block in content:\n text = getattr(block, \"text\", None)\n if text:\n texts.append(text)\n return \"\\n\".join(texts).strip()\n\n\ndef execute_tool(block, state: CompactState) -> str:\n if block.name == \"bash\":\n return run_bash(block.input[\"command\"], block.id)\n if block.name == \"read_file\":\n return run_read(block.input[\"path\"], block.id, state, block.input.get(\"limit\"))\n if block.name == \"write_file\":\n return run_write(block.input[\"path\"], block.input[\"content\"])\n if block.name == \"edit_file\":\n return run_edit(block.input[\"path\"], block.input[\"old_text\"], block.input[\"new_text\"])\n if block.name == \"compact\":\n return \"Compacting conversation...\"\n return f\"Unknown tool: {block.name}\"\n\n\ndef agent_loop(messages: list, state: CompactState) -> None:\n while True:\n messages[:] = micro_compact(messages)\n\n if estimate_context_size(messages) > CONTEXT_LIMIT:\n print(\"[auto compact]\")\n messages[:] = compact_history(messages, state)\n\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n manual_compact = False\n compact_focus = None\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n output = execute_tool(block, state)\n if block.name == \"compact\":\n manual_compact = True\n compact_focus = (block.input or {}).get(\"focus\")\n\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n if manual_compact:\n print(\"[manual compact]\")\n messages[:] = compact_history(messages, state, focus=compact_focus)\n\n\nif __name__ == \"__main__\":\n history = []\n compact_state = CompactState()\n\n while True:\n try:\n query = input(\"\\033[36ms06 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, compact_state)\n\n final_text = extract_text(history[-1][\"content\"])\n if final_text:\n print(final_text)\n print()\n"
+ },
+ {
+ "id": "s07",
+ "filename": "s07_permission_system.py",
+ "title": "Permission System",
+ "subtitle": "Intent Must Pass Safety",
+ "loc": 308,
+ "tools": [
+ "bash",
+ "read_file",
+ "write_file",
+ "edit_file"
+ ],
+ "newTools": [],
+ "coreAddition": "deny / mode / allow / ask pipeline",
+ "keyInsight": "Safety is a pipeline, not a boolean: deny, check mode, allow, then ask.",
+ "classes": [
+ {
+ "name": "BashSecurityValidator",
+ "startLine": 55,
+ "endLine": 98
+ },
+ {
+ "name": "PermissionManager",
+ "startLine": 127,
+ "endLine": 250
+ }
+ ],
+ "functions": [
+ {
+ "name": "is_workspace_trusted",
+ "signature": "def is_workspace_trusted(workspace: Path = None)",
+ "startLine": 99
+ },
+ {
+ "name": "safe_path",
+ "signature": "def safe_path(p: str)",
+ "startLine": 251
+ },
+ {
+ "name": "run_bash",
+ "signature": "def run_bash(command: str)",
+ "startLine": 258
+ },
+ {
+ "name": "run_read",
+ "signature": "def run_read(path: str, limit: int = None)",
+ "startLine": 268
+ },
+ {
+ "name": "run_write",
+ "signature": "def run_write(path: str, content: str)",
+ "startLine": 278
+ },
+ {
+ "name": "run_edit",
+ "signature": "def run_edit(path: str, old_text: str, new_text: str)",
+ "startLine": 288
+ },
+ {
+ "name": "agent_loop",
+ "signature": "def agent_loop(messages: list, perms: PermissionManager)",
+ "startLine": 322
+ }
+ ],
+ "layer": "hardening",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: safety -- the pipeline between intent and execution.\n\"\"\"\ns07_permission_system.py - Permission System\n\nEvery tool call passes through a permission pipeline before execution.\n\nTeaching pipeline:\n 1. deny rules\n 2. mode check\n 3. allow rules\n 4. ask user\n\nThis version intentionally teaches three modes first:\n - default\n - plan\n - auto\n\nThat is enough to build a real, understandable permission system without\nburying readers under every advanced policy branch on day one.\n\nKey insight: \"Safety is a pipeline, not a boolean.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nfrom fnmatch import fnmatch\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# -- Permission modes --\n# Teaching version starts with three clear modes first.\nMODES = (\"default\", \"plan\", \"auto\")\n\nREAD_ONLY_TOOLS = {\"read_file\", \"bash_readonly\"}\n\n# Tools that modify state\nWRITE_TOOLS = {\"write_file\", \"edit_file\", \"bash\"}\n\n\n# -- Bash security validation --\nclass BashSecurityValidator:\n \"\"\"\n Validate bash commands for obviously dangerous patterns.\n\n The teaching version deliberately keeps this small and easy to read.\n First catch a few high-risk patterns, then let the permission pipeline\n decide whether to deny or ask the user.\n \"\"\"\n\n VALIDATORS = [\n (\"shell_metachar\", r\"[;&|`$]\"), # shell metacharacters\n (\"sudo\", r\"\\bsudo\\b\"), # privilege escalation\n (\"rm_rf\", r\"\\brm\\s+(-[a-zA-Z]*)?r\"), # recursive delete\n (\"cmd_substitution\", r\"\\$\\(\"), # command substitution\n (\"ifs_injection\", r\"\\bIFS\\s*=\"), # IFS manipulation\n ]\n\n def validate(self, command: str) -> list:\n \"\"\"\n Check a bash command against all validators.\n\n Returns list of (validator_name, matched_pattern) tuples for failures.\n An empty list means the command passed all validators.\n \"\"\"\n failures = []\n for name, pattern in self.VALIDATORS:\n if re.search(pattern, command):\n failures.append((name, pattern))\n return failures\n\n def is_safe(self, command: str) -> bool:\n \"\"\"Convenience: returns True only if no validators triggered.\"\"\"\n return len(self.validate(command)) == 0\n\n def describe_failures(self, command: str) -> str:\n \"\"\"Human-readable summary of validation failures.\"\"\"\n failures = self.validate(command)\n if not failures:\n return \"No issues detected\"\n parts = [f\"{name} (pattern: {pattern})\" for name, pattern in failures]\n return \"Security flags: \" + \", \".join(parts)\n\n\n# -- Workspace trust --\ndef is_workspace_trusted(workspace: Path = None) -> bool:\n \"\"\"\n Check if a workspace has been explicitly marked as trusted.\n\n The teaching version uses a simple marker file. A more complete system\n can layer richer trust flows on top of the same idea.\n \"\"\"\n ws = workspace or WORKDIR\n trust_marker = ws / \".claude\" / \".claude_trusted\"\n return trust_marker.exists()\n\n\n# Singleton validator instance used by the permission pipeline\nbash_validator = BashSecurityValidator()\n\n\n# -- Permission rules --\n# Rules are checked in order: first match wins.\n# Format: {\"tool\": \"\", \"path\": \"\", \"behavior\": \"allow|deny|ask\"}\nDEFAULT_RULES = [\n # Always deny dangerous patterns\n {\"tool\": \"bash\", \"content\": \"rm -rf /\", \"behavior\": \"deny\"},\n {\"tool\": \"bash\", \"content\": \"sudo *\", \"behavior\": \"deny\"},\n # Allow reading anything\n {\"tool\": \"read_file\", \"path\": \"*\", \"behavior\": \"allow\"},\n]\n\n\nclass PermissionManager:\n \"\"\"\n Manages permission decisions for tool calls.\n\n Pipeline: deny_rules -> mode_check -> allow_rules -> ask_user\n\n The teaching version keeps the decision path short on purpose so readers\n can implement it themselves before adding more advanced policy layers.\n \"\"\"\n\n def __init__(self, mode: str = \"default\", rules: list = None):\n if mode not in MODES:\n raise ValueError(f\"Unknown mode: {mode}. Choose from {MODES}\")\n self.mode = mode\n self.rules = rules or list(DEFAULT_RULES)\n # Simple denial tracking helps surface when the agent is repeatedly\n # asking for actions the system will not allow.\n self.consecutive_denials = 0\n self.max_consecutive_denials = 3\n\n def check(self, tool_name: str, tool_input: dict) -> dict:\n \"\"\"\n Returns: {\"behavior\": \"allow\"|\"deny\"|\"ask\", \"reason\": str}\n \"\"\"\n # Step 0: Bash security validation (before deny rules)\n # Teaching version checks early for clarity.\n if tool_name == \"bash\":\n command = tool_input.get(\"command\", \"\")\n failures = bash_validator.validate(command)\n if failures:\n # Severe patterns (sudo, rm_rf) get immediate deny\n severe = {\"sudo\", \"rm_rf\"}\n severe_hits = [f for f in failures if f[0] in severe]\n if severe_hits:\n desc = bash_validator.describe_failures(command)\n return {\"behavior\": \"deny\",\n \"reason\": f\"Bash validator: {desc}\"}\n # Other patterns escalate to ask (user can still approve)\n desc = bash_validator.describe_failures(command)\n return {\"behavior\": \"ask\",\n \"reason\": f\"Bash validator flagged: {desc}\"}\n\n # Step 1: Deny rules (bypass-immune, checked first always)\n for rule in self.rules:\n if rule[\"behavior\"] != \"deny\":\n continue\n if self._matches(rule, tool_name, tool_input):\n return {\"behavior\": \"deny\",\n \"reason\": f\"Blocked by deny rule: {rule}\"}\n\n # Step 2: Mode-based decisions\n if self.mode == \"plan\":\n # Plan mode: deny all write operations, allow reads\n if tool_name in WRITE_TOOLS:\n return {\"behavior\": \"deny\",\n \"reason\": \"Plan mode: write operations are blocked\"}\n return {\"behavior\": \"allow\", \"reason\": \"Plan mode: read-only allowed\"}\n\n if self.mode == \"auto\":\n # Auto mode: auto-allow read-only tools, ask for writes\n if tool_name in READ_ONLY_TOOLS or tool_name == \"read_file\":\n return {\"behavior\": \"allow\",\n \"reason\": \"Auto mode: read-only tool auto-approved\"}\n # Teaching: fall through to allow rules, then ask\n pass\n\n # Step 3: Allow rules\n for rule in self.rules:\n if rule[\"behavior\"] != \"allow\":\n continue\n if self._matches(rule, tool_name, tool_input):\n self.consecutive_denials = 0\n return {\"behavior\": \"allow\",\n \"reason\": f\"Matched allow rule: {rule}\"}\n\n # Step 4: Ask user (default behavior for unmatched tools)\n return {\"behavior\": \"ask\",\n \"reason\": f\"No rule matched for {tool_name}, asking user\"}\n\n def ask_user(self, tool_name: str, tool_input: dict) -> bool:\n \"\"\"Interactive approval prompt. Returns True if approved.\"\"\"\n preview = json.dumps(tool_input, ensure_ascii=False)[:200]\n print(f\"\\n [Permission] {tool_name}: {preview}\")\n try:\n answer = input(\" Allow? (y/n/always): \").strip().lower()\n except (EOFError, KeyboardInterrupt):\n return False\n\n if answer == \"always\":\n # Add permanent allow rule for this tool\n self.rules.append({\"tool\": tool_name, \"path\": \"*\", \"behavior\": \"allow\"})\n self.consecutive_denials = 0\n return True\n if answer in (\"y\", \"yes\"):\n self.consecutive_denials = 0\n return True\n\n # Track denials for circuit breaker\n self.consecutive_denials += 1\n if self.consecutive_denials >= self.max_consecutive_denials:\n print(f\" [{self.consecutive_denials} consecutive denials -- \"\n \"consider switching to plan mode]\")\n return False\n\n def _matches(self, rule: dict, tool_name: str, tool_input: dict) -> bool:\n \"\"\"Check if a rule matches the tool call.\"\"\"\n # Tool name match\n if rule.get(\"tool\") and rule[\"tool\"] != \"*\":\n if rule[\"tool\"] != tool_name:\n return False\n # Path pattern match\n if \"path\" in rule and rule[\"path\"] != \"*\":\n path = tool_input.get(\"path\", \"\")\n if not fnmatch(path, rule[\"path\"]):\n return False\n # Content pattern match (for bash commands)\n if \"content\" in rule:\n command = tool_input.get(\"command\", \"\")\n if not fnmatch(command, rule[\"content\"]):\n return False\n return True\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\nThe user controls permissions. Some tool calls may be denied.\"\"\"\n\n\ndef agent_loop(messages: list, perms: PermissionManager):\n \"\"\"\n The permission-aware agent loop.\n\n For each tool call:\n 1. LLM requests tool use\n 2. Permission pipeline checks: deny_rules -> mode -> allow_rules -> ask\n 3. If allowed: execute tool, return result\n 4. If denied: return rejection message to LLM\n \"\"\"\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # -- Permission check --\n decision = perms.check(block.name, block.input or {})\n\n if decision[\"behavior\"] == \"deny\":\n output = f\"Permission denied: {decision['reason']}\"\n print(f\" [DENIED] {block.name}: {decision['reason']}\")\n\n elif decision[\"behavior\"] == \"ask\":\n if perms.ask_user(block.name, block.input or {}):\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n else:\n output = f\"Permission denied by user for {block.name}\"\n print(f\" [USER DENIED] {block.name}\")\n\n else: # allow\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n # Choose permission mode at startup\n print(\"Permission modes: default, plan, auto\")\n mode_input = input(\"Mode (default): \").strip().lower() or \"default\"\n if mode_input not in MODES:\n mode_input = \"default\"\n\n perms = PermissionManager(mode=mode_input)\n print(f\"[Permission mode: {mode_input}]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms07 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n # /mode command to switch modes at runtime\n if query.startswith(\"/mode\"):\n parts = query.split()\n if len(parts) == 2 and parts[1] in MODES:\n perms.mode = parts[1]\n print(f\"[Switched to {parts[1]} mode]\")\n else:\n print(f\"Usage: /mode <{'|'.join(MODES)}>\")\n continue\n\n # /rules command to show current rules\n if query.strip() == \"/rules\":\n for i, rule in enumerate(perms.rules):\n print(f\" {i}: {rule}\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, perms)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): safety(安全)——连接“意图”和“执行”的权限管线。\n\"\"\"\ns07_permission_system.py - Permission System(权限系统)\n\n每一次工具调用在执行前都必须经过权限管线(permission pipeline)。\n\n教学版管线:\n 1. deny rules(拒绝规则)\n 2. mode check(模式检查)\n 3. allow rules(放行规则)\n 4. ask user(询问用户)\n\n本版本先聚焦三种模式:\n - default(默认)\n - plan(规划)\n - auto(自动)\n\n这已足够搭建可用且可理解的权限系统,不会在起步阶段被复杂策略分支淹没。\n\n关键洞察:\n\"安全是管线,不是布尔开关。\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nfrom fnmatch import fnmatch\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\n\n# -- 权限模式(permission modes) --\n# 教学版先从三个清晰模式入手。\nMODES = (\"default\", \"plan\", \"auto\")\n\nREAD_ONLY_TOOLS = {\"read_file\", \"bash_readonly\"}\n\n# 具有状态副作用的工具\nWRITE_TOOLS = {\"write_file\", \"edit_file\", \"bash\"}\n\n\n# -- Bash 安全校验 --\nclass BashSecurityValidator:\n \"\"\"\n 校验 bash 命令中明显危险的模式。\n\n 教学版刻意保持规则小而清晰:\n 先识别高风险模式,再由权限管线决定“拒绝”或“询问用户”。\n \"\"\"\n\n VALIDATORS = [\n (\"shell_metachar\", r\"[;&|`$]\"), # shell 元字符\n (\"sudo\", r\"\\bsudo\\b\"), # 提权\n (\"rm_rf\", r\"\\brm\\s+(-[a-zA-Z]*)?r\"), # 递归删除\n (\"cmd_substitution\", r\"\\$\\(\"), # 命令替换\n (\"ifs_injection\", r\"\\bIFS\\s*=\"), # IFS 注入\n ]\n\n def validate(self, command: str) -> list:\n \"\"\"\n 使用全部校验器检查 bash 命令。\n\n 返回失败列表:`(validator_name, matched_pattern)` 元组。\n 返回空列表表示全部通过。\n \"\"\"\n failures = []\n for name, pattern in self.VALIDATORS:\n if re.search(pattern, command):\n failures.append((name, pattern))\n return failures\n\n def is_safe(self, command: str) -> bool:\n \"\"\"便捷接口:仅当无命中规则时返回 True。\"\"\"\n return len(self.validate(command)) == 0\n\n def describe_failures(self, command: str) -> str:\n \"\"\"输出可读的校验失败摘要。\"\"\"\n failures = self.validate(command)\n if not failures:\n return \"未发现安全问题\"\n parts = [f\"{name} (pattern: {pattern})\" for name, pattern in failures]\n return \"安全标记: \" + \", \".join(parts)\n\n\n# -- Workspace 信任状态 --\ndef is_workspace_trusted(workspace: Path = None) -> bool:\n \"\"\"\n 检查工作区是否被显式标记为 trusted(可信)。\n\n 教学版使用简单标记文件。生产系统可在同一思路上叠加更丰富的信任流程。\n \"\"\"\n ws = workspace or WORKDIR\n trust_marker = ws / \".claude\" / \".claude_trusted\"\n return trust_marker.exists()\n\n\n# 权限管线复用的单例校验器\nbash_validator = BashSecurityValidator()\n\n\n# -- 权限规则 --\n# 规则按顺序匹配:first match wins(首条命中生效)。\n# 结构:{\"tool\": \"\", \"path\": \"\", \"behavior\": \"allow|deny|ask\"}\nDEFAULT_RULES = [\n # 永久拒绝高危模式\n {\"tool\": \"bash\", \"content\": \"rm -rf /\", \"behavior\": \"deny\"},\n {\"tool\": \"bash\", \"content\": \"sudo *\", \"behavior\": \"deny\"},\n # 允许任意读取\n {\"tool\": \"read_file\", \"path\": \"*\", \"behavior\": \"allow\"},\n]\n\n\nclass PermissionManager:\n \"\"\"\n 管理工具调用的权限决策。\n\n 决策管线:deny_rules -> mode_check -> allow_rules -> ask_user\n\n 教学版故意保持路径精简,便于读者先自行实现,再叠加进阶策略层。\n \"\"\"\n\n def __init__(self, mode: str = \"default\", rules: list = None):\n if mode not in MODES:\n raise ValueError(f\"未知模式: {mode}。可选值:{MODES}\")\n self.mode = mode\n self.rules = rules or list(DEFAULT_RULES)\n # 连续拒绝计数可暴露“模型持续请求被禁止动作”的状态。\n self.consecutive_denials = 0\n self.max_consecutive_denials = 3\n\n def check(self, tool_name: str, tool_input: dict) -> dict:\n \"\"\"\n 返回:{\"behavior\": \"allow\"|\"deny\"|\"ask\", \"reason\": str}\n \"\"\"\n # Step 0: Bash 安全校验(先于 deny 规则)\n # 教学版前置校验,保证流程可读性。\n if tool_name == \"bash\":\n command = tool_input.get(\"command\", \"\")\n failures = bash_validator.validate(command)\n if failures:\n # 严重模式(sudo, rm_rf)立即拒绝\n severe = {\"sudo\", \"rm_rf\"}\n severe_hits = [f for f in failures if f[0] in severe]\n if severe_hits:\n desc = bash_validator.describe_failures(command)\n return {\"behavior\": \"deny\",\n \"reason\": f\"Bash 校验器: {desc}\"}\n # 其他模式升级为 ask(仍允许用户批准)\n desc = bash_validator.describe_failures(command)\n return {\"behavior\": \"ask\",\n \"reason\": f\"Bash 校验命中:{desc}\"}\n\n # Step 1: Deny 规则(不可绕过,永远最先检查)\n for rule in self.rules:\n if rule[\"behavior\"] != \"deny\":\n continue\n if self._matches(rule, tool_name, tool_input):\n return {\"behavior\": \"deny\",\n \"reason\": f\"命中 deny 规则并被拦截:{rule}\"}\n\n # Step 2: 基于 mode 的决策\n if self.mode == \"plan\":\n # Plan 模式:拒绝写操作,仅允许读取\n if tool_name in WRITE_TOOLS:\n return {\"behavior\": \"deny\",\n \"reason\": \"Plan 模式:写操作被阻止\"}\n return {\"behavior\": \"allow\", \"reason\": \"Plan 模式:允许只读操作\"}\n\n if self.mode == \"auto\":\n # Auto 模式:只读自动放行,写入请求走询问\n if tool_name in READ_ONLY_TOOLS or tool_name == \"read_file\":\n return {\"behavior\": \"allow\",\n \"reason\": \"Auto 模式:只读工具自动批准\"}\n # 教学版:继续走 allow 规则,最后再 ask\n pass\n\n # Step 3: Allow 规则\n for rule in self.rules:\n if rule[\"behavior\"] != \"allow\":\n continue\n if self._matches(rule, tool_name, tool_input):\n self.consecutive_denials = 0\n return {\"behavior\": \"allow\",\n \"reason\": f\"命中 allow 规则:{rule}\"}\n\n # Step 4: Ask user(未命中规则时的默认行为)\n return {\"behavior\": \"ask\",\n \"reason\": f\"{tool_name} 未命中任何规则,转为询问用户\"}\n\n def ask_user(self, tool_name: str, tool_input: dict) -> bool:\n \"\"\"交互式批准流程:用户批准返回 True。\"\"\"\n preview = json.dumps(tool_input, ensure_ascii=False)[:200]\n print(f\"\\n [Permission] {tool_name}: {preview}\")\n try:\n answer = input(\" 是否允许?(y/n/always): \").strip().lower()\n except (EOFError, KeyboardInterrupt):\n return False\n\n if answer == \"always\":\n # 为该工具添加持久 allow 规则\n self.rules.append({\"tool\": tool_name, \"path\": \"*\", \"behavior\": \"allow\"})\n self.consecutive_denials = 0\n return True\n if answer in (\"y\", \"yes\"):\n self.consecutive_denials = 0\n return True\n\n # 连续拒绝计数(可视作简化断路器)\n self.consecutive_denials += 1\n if self.consecutive_denials >= self.max_consecutive_denials:\n print(f\" [{self.consecutive_denials} consecutive denials -- \"\n \"建议切换到 plan 模式]\")\n return False\n\n def _matches(self, rule: dict, tool_name: str, tool_input: dict) -> bool:\n \"\"\"检查规则是否匹配当前工具调用。\"\"\"\n # 工具名匹配\n if rule.get(\"tool\") and rule[\"tool\"] != \"*\":\n if rule[\"tool\"] != tool_name:\n return False\n # 路径匹配\n if \"path\" in rule and rule[\"path\"] != \"*\":\n path = tool_input.get(\"path\", \"\")\n if not fnmatch(path, rule[\"path\"]):\n return False\n # 内容匹配(主要用于 bash 命令)\n if \"content\" in rule:\n command = tool_input.get(\"command\", \"\")\n if not fnmatch(command, rule[\"content\"]):\n return False\n return True\n\n\n# -- 工具实现 --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\nSYSTEM = f\"\"\"你是位于 {WORKDIR} 的 coding agent(编码智能体),请使用工具解决任务。\n权限由用户控制,部分工具调用可能会被拒绝。\"\"\"\n\n\ndef agent_loop(messages: list, perms: PermissionManager):\n \"\"\"\n 带权限感知的智能体循环。\n\n 每次工具调用都遵循:\n 1. LLM 发起 tool_use(工具调用)请求;\n 2. 权限管线检查:deny_rules -> mode -> allow_rules -> ask;\n 3. 若允许:执行工具并返回结果;\n 4. 若拒绝:向 LLM 返回拒绝信息。\n \"\"\"\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # -- 权限检查 --\n decision = perms.check(block.name, block.input or {})\n\n if decision[\"behavior\"] == \"deny\":\n output = f\"Permission denied: {decision['reason']}\"\n print(f\" [DENIED] {block.name}: {decision['reason']}\")\n\n elif decision[\"behavior\"] == \"ask\":\n if perms.ask_user(block.name, block.input or {}):\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n else:\n output = f\"用户拒绝了工具:{block.name}\"\n print(f\" [USER DENIED] {block.name}\")\n\n else: # allow(允许执行)\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n # 启动时选择权限模式\n print(\"权限模式:default, plan, auto\")\n mode_input = input(\"模式(默认 default): \").strip().lower() or \"default\"\n if mode_input not in MODES:\n mode_input = \"default\"\n\n perms = PermissionManager(mode=mode_input)\n print(f\"[当前权限模式: {mode_input}]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms07 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n # /mode 命令:运行中切换模式\n if query.startswith(\"/mode\"):\n parts = query.split()\n if len(parts) == 2 and parts[1] in MODES:\n perms.mode = parts[1]\n print(f\"[已切换到 {parts[1]} 模式]\")\n else:\n print(f\"用法: /mode <{'|'.join(MODES)}>\")\n continue\n\n # /rules 命令:查看当前规则\n if query.strip() == \"/rules\":\n for i, rule in enumerate(perms.rules):\n print(f\" {i}: {rule}\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, perms)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: safety -- the pipeline between intent and execution.\n\"\"\"\ns07_permission_system.py - Permission System\n\nEvery tool call passes through a permission pipeline before execution.\n\nTeaching pipeline:\n 1. deny rules\n 2. mode check\n 3. allow rules\n 4. ask user\n\nThis version intentionally teaches three modes first:\n - default\n - plan\n - auto\n\nThat is enough to build a real, understandable permission system without\nburying readers under every advanced policy branch on day one.\n\nKey insight: \"Safety is a pipeline, not a boolean.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nfrom fnmatch import fnmatch\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# -- Permission modes --\n# Teaching version starts with three clear modes first.\nMODES = (\"default\", \"plan\", \"auto\")\n\nREAD_ONLY_TOOLS = {\"read_file\", \"bash_readonly\"}\n\n# Tools that modify state\nWRITE_TOOLS = {\"write_file\", \"edit_file\", \"bash\"}\n\n\n# -- Bash security validation --\nclass BashSecurityValidator:\n \"\"\"\n Validate bash commands for obviously dangerous patterns.\n\n The teaching version deliberately keeps this small and easy to read.\n First catch a few high-risk patterns, then let the permission pipeline\n decide whether to deny or ask the user.\n \"\"\"\n\n VALIDATORS = [\n (\"shell_metachar\", r\"[;&|`$]\"), # shell metacharacters\n (\"sudo\", r\"\\bsudo\\b\"), # privilege escalation\n (\"rm_rf\", r\"\\brm\\s+(-[a-zA-Z]*)?r\"), # recursive delete\n (\"cmd_substitution\", r\"\\$\\(\"), # command substitution\n (\"ifs_injection\", r\"\\bIFS\\s*=\"), # IFS manipulation\n ]\n\n def validate(self, command: str) -> list:\n \"\"\"\n Check a bash command against all validators.\n\n Returns list of (validator_name, matched_pattern) tuples for failures.\n An empty list means the command passed all validators.\n \"\"\"\n failures = []\n for name, pattern in self.VALIDATORS:\n if re.search(pattern, command):\n failures.append((name, pattern))\n return failures\n\n def is_safe(self, command: str) -> bool:\n \"\"\"Convenience: returns True only if no validators triggered.\"\"\"\n return len(self.validate(command)) == 0\n\n def describe_failures(self, command: str) -> str:\n \"\"\"Human-readable summary of validation failures.\"\"\"\n failures = self.validate(command)\n if not failures:\n return \"No issues detected\"\n parts = [f\"{name} (pattern: {pattern})\" for name, pattern in failures]\n return \"Security flags: \" + \", \".join(parts)\n\n\n# -- Workspace trust --\ndef is_workspace_trusted(workspace: Path = None) -> bool:\n \"\"\"\n Check if a workspace has been explicitly marked as trusted.\n\n The teaching version uses a simple marker file. A more complete system\n can layer richer trust flows on top of the same idea.\n \"\"\"\n ws = workspace or WORKDIR\n trust_marker = ws / \".claude\" / \".claude_trusted\"\n return trust_marker.exists()\n\n\n# Singleton validator instance used by the permission pipeline\nbash_validator = BashSecurityValidator()\n\n\n# -- Permission rules --\n# Rules are checked in order: first match wins.\n# Format: {\"tool\": \"\", \"path\": \"\", \"behavior\": \"allow|deny|ask\"}\nDEFAULT_RULES = [\n # Always deny dangerous patterns\n {\"tool\": \"bash\", \"content\": \"rm -rf /\", \"behavior\": \"deny\"},\n {\"tool\": \"bash\", \"content\": \"sudo *\", \"behavior\": \"deny\"},\n # Allow reading anything\n {\"tool\": \"read_file\", \"path\": \"*\", \"behavior\": \"allow\"},\n]\n\n\nclass PermissionManager:\n \"\"\"\n Manages permission decisions for tool calls.\n\n Pipeline: deny_rules -> mode_check -> allow_rules -> ask_user\n\n The teaching version keeps the decision path short on purpose so readers\n can implement it themselves before adding more advanced policy layers.\n \"\"\"\n\n def __init__(self, mode: str = \"default\", rules: list = None):\n if mode not in MODES:\n raise ValueError(f\"Unknown mode: {mode}. Choose from {MODES}\")\n self.mode = mode\n self.rules = rules or list(DEFAULT_RULES)\n # Simple denial tracking helps surface when the agent is repeatedly\n # asking for actions the system will not allow.\n self.consecutive_denials = 0\n self.max_consecutive_denials = 3\n\n def check(self, tool_name: str, tool_input: dict) -> dict:\n \"\"\"\n Returns: {\"behavior\": \"allow\"|\"deny\"|\"ask\", \"reason\": str}\n \"\"\"\n # Step 0: Bash security validation (before deny rules)\n # Teaching version checks early for clarity.\n if tool_name == \"bash\":\n command = tool_input.get(\"command\", \"\")\n failures = bash_validator.validate(command)\n if failures:\n # Severe patterns (sudo, rm_rf) get immediate deny\n severe = {\"sudo\", \"rm_rf\"}\n severe_hits = [f for f in failures if f[0] in severe]\n if severe_hits:\n desc = bash_validator.describe_failures(command)\n return {\"behavior\": \"deny\",\n \"reason\": f\"Bash validator: {desc}\"}\n # Other patterns escalate to ask (user can still approve)\n desc = bash_validator.describe_failures(command)\n return {\"behavior\": \"ask\",\n \"reason\": f\"Bash validator flagged: {desc}\"}\n\n # Step 1: Deny rules (bypass-immune, checked first always)\n for rule in self.rules:\n if rule[\"behavior\"] != \"deny\":\n continue\n if self._matches(rule, tool_name, tool_input):\n return {\"behavior\": \"deny\",\n \"reason\": f\"Blocked by deny rule: {rule}\"}\n\n # Step 2: Mode-based decisions\n if self.mode == \"plan\":\n # Plan mode: deny all write operations, allow reads\n if tool_name in WRITE_TOOLS:\n return {\"behavior\": \"deny\",\n \"reason\": \"Plan mode: write operations are blocked\"}\n return {\"behavior\": \"allow\", \"reason\": \"Plan mode: read-only allowed\"}\n\n if self.mode == \"auto\":\n # Auto mode: auto-allow read-only tools, ask for writes\n if tool_name in READ_ONLY_TOOLS or tool_name == \"read_file\":\n return {\"behavior\": \"allow\",\n \"reason\": \"Auto mode: read-only tool auto-approved\"}\n # Teaching: fall through to allow rules, then ask\n pass\n\n # Step 3: Allow rules\n for rule in self.rules:\n if rule[\"behavior\"] != \"allow\":\n continue\n if self._matches(rule, tool_name, tool_input):\n self.consecutive_denials = 0\n return {\"behavior\": \"allow\",\n \"reason\": f\"Matched allow rule: {rule}\"}\n\n # Step 4: Ask user (default behavior for unmatched tools)\n return {\"behavior\": \"ask\",\n \"reason\": f\"No rule matched for {tool_name}, asking user\"}\n\n def ask_user(self, tool_name: str, tool_input: dict) -> bool:\n \"\"\"Interactive approval prompt. Returns True if approved.\"\"\"\n preview = json.dumps(tool_input, ensure_ascii=False)[:200]\n print(f\"\\n [Permission] {tool_name}: {preview}\")\n try:\n answer = input(\" Allow? (y/n/always): \").strip().lower()\n except (EOFError, KeyboardInterrupt):\n return False\n\n if answer == \"always\":\n # Add permanent allow rule for this tool\n self.rules.append({\"tool\": tool_name, \"path\": \"*\", \"behavior\": \"allow\"})\n self.consecutive_denials = 0\n return True\n if answer in (\"y\", \"yes\"):\n self.consecutive_denials = 0\n return True\n\n # Track denials for circuit breaker\n self.consecutive_denials += 1\n if self.consecutive_denials >= self.max_consecutive_denials:\n print(f\" [{self.consecutive_denials} consecutive denials -- \"\n \"consider switching to plan mode]\")\n return False\n\n def _matches(self, rule: dict, tool_name: str, tool_input: dict) -> bool:\n \"\"\"Check if a rule matches the tool call.\"\"\"\n # Tool name match\n if rule.get(\"tool\") and rule[\"tool\"] != \"*\":\n if rule[\"tool\"] != tool_name:\n return False\n # Path pattern match\n if \"path\" in rule and rule[\"path\"] != \"*\":\n path = tool_input.get(\"path\", \"\")\n if not fnmatch(path, rule[\"path\"]):\n return False\n # Content pattern match (for bash commands)\n if \"content\" in rule:\n command = tool_input.get(\"command\", \"\")\n if not fnmatch(command, rule[\"content\"]):\n return False\n return True\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\nThe user controls permissions. Some tool calls may be denied.\"\"\"\n\n\ndef agent_loop(messages: list, perms: PermissionManager):\n \"\"\"\n The permission-aware agent loop.\n\n For each tool call:\n 1. LLM requests tool use\n 2. Permission pipeline checks: deny_rules -> mode -> allow_rules -> ask\n 3. If allowed: execute tool, return result\n 4. If denied: return rejection message to LLM\n \"\"\"\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # -- Permission check --\n decision = perms.check(block.name, block.input or {})\n\n if decision[\"behavior\"] == \"deny\":\n output = f\"Permission denied: {decision['reason']}\"\n print(f\" [DENIED] {block.name}: {decision['reason']}\")\n\n elif decision[\"behavior\"] == \"ask\":\n if perms.ask_user(block.name, block.input or {}):\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n else:\n output = f\"Permission denied by user for {block.name}\"\n print(f\" [USER DENIED] {block.name}\")\n\n else: # allow\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n # Choose permission mode at startup\n print(\"Permission modes: default, plan, auto\")\n mode_input = input(\"Mode (default): \").strip().lower() or \"default\"\n if mode_input not in MODES:\n mode_input = \"default\"\n\n perms = PermissionManager(mode=mode_input)\n print(f\"[Permission mode: {mode_input}]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms07 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n # /mode command to switch modes at runtime\n if query.startswith(\"/mode\"):\n parts = query.split()\n if len(parts) == 2 and parts[1] in MODES:\n perms.mode = parts[1]\n print(f\"[Switched to {parts[1]} mode]\")\n else:\n print(f\"Usage: /mode <{'|'.join(MODES)}>\")\n continue\n\n # /rules command to show current rules\n if query.strip() == \"/rules\":\n for i, rule in enumerate(perms.rules):\n print(f\" {i}: {rule}\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, perms)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: safety -- the pipeline between intent and execution.\n\"\"\"\ns07_permission_system.py - Permission System\n\nEvery tool call passes through a permission pipeline before execution.\n\nTeaching pipeline:\n 1. deny rules\n 2. mode check\n 3. allow rules\n 4. ask user\n\nThis version intentionally teaches three modes first:\n - default\n - plan\n - auto\n\nThat is enough to build a real, understandable permission system without\nburying readers under every advanced policy branch on day one.\n\nKey insight: \"Safety is a pipeline, not a boolean.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nfrom fnmatch import fnmatch\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# -- Permission modes --\n# Teaching version starts with three clear modes first.\nMODES = (\"default\", \"plan\", \"auto\")\n\nREAD_ONLY_TOOLS = {\"read_file\", \"bash_readonly\"}\n\n# Tools that modify state\nWRITE_TOOLS = {\"write_file\", \"edit_file\", \"bash\"}\n\n\n# -- Bash security validation --\nclass BashSecurityValidator:\n \"\"\"\n Validate bash commands for obviously dangerous patterns.\n\n The teaching version deliberately keeps this small and easy to read.\n First catch a few high-risk patterns, then let the permission pipeline\n decide whether to deny or ask the user.\n \"\"\"\n\n VALIDATORS = [\n (\"shell_metachar\", r\"[;&|`$]\"), # shell metacharacters\n (\"sudo\", r\"\\bsudo\\b\"), # privilege escalation\n (\"rm_rf\", r\"\\brm\\s+(-[a-zA-Z]*)?r\"), # recursive delete\n (\"cmd_substitution\", r\"\\$\\(\"), # command substitution\n (\"ifs_injection\", r\"\\bIFS\\s*=\"), # IFS manipulation\n ]\n\n def validate(self, command: str) -> list:\n \"\"\"\n Check a bash command against all validators.\n\n Returns list of (validator_name, matched_pattern) tuples for failures.\n An empty list means the command passed all validators.\n \"\"\"\n failures = []\n for name, pattern in self.VALIDATORS:\n if re.search(pattern, command):\n failures.append((name, pattern))\n return failures\n\n def is_safe(self, command: str) -> bool:\n \"\"\"Convenience: returns True only if no validators triggered.\"\"\"\n return len(self.validate(command)) == 0\n\n def describe_failures(self, command: str) -> str:\n \"\"\"Human-readable summary of validation failures.\"\"\"\n failures = self.validate(command)\n if not failures:\n return \"No issues detected\"\n parts = [f\"{name} (pattern: {pattern})\" for name, pattern in failures]\n return \"Security flags: \" + \", \".join(parts)\n\n\n# -- Workspace trust --\ndef is_workspace_trusted(workspace: Path = None) -> bool:\n \"\"\"\n Check if a workspace has been explicitly marked as trusted.\n\n The teaching version uses a simple marker file. A more complete system\n can layer richer trust flows on top of the same idea.\n \"\"\"\n ws = workspace or WORKDIR\n trust_marker = ws / \".claude\" / \".claude_trusted\"\n return trust_marker.exists()\n\n\n# Singleton validator instance used by the permission pipeline\nbash_validator = BashSecurityValidator()\n\n\n# -- Permission rules --\n# Rules are checked in order: first match wins.\n# Format: {\"tool\": \"\", \"path\": \"\", \"behavior\": \"allow|deny|ask\"}\nDEFAULT_RULES = [\n # Always deny dangerous patterns\n {\"tool\": \"bash\", \"content\": \"rm -rf /\", \"behavior\": \"deny\"},\n {\"tool\": \"bash\", \"content\": \"sudo *\", \"behavior\": \"deny\"},\n # Allow reading anything\n {\"tool\": \"read_file\", \"path\": \"*\", \"behavior\": \"allow\"},\n]\n\n\nclass PermissionManager:\n \"\"\"\n Manages permission decisions for tool calls.\n\n Pipeline: deny_rules -> mode_check -> allow_rules -> ask_user\n\n The teaching version keeps the decision path short on purpose so readers\n can implement it themselves before adding more advanced policy layers.\n \"\"\"\n\n def __init__(self, mode: str = \"default\", rules: list = None):\n if mode not in MODES:\n raise ValueError(f\"Unknown mode: {mode}. Choose from {MODES}\")\n self.mode = mode\n self.rules = rules or list(DEFAULT_RULES)\n # Simple denial tracking helps surface when the agent is repeatedly\n # asking for actions the system will not allow.\n self.consecutive_denials = 0\n self.max_consecutive_denials = 3\n\n def check(self, tool_name: str, tool_input: dict) -> dict:\n \"\"\"\n Returns: {\"behavior\": \"allow\"|\"deny\"|\"ask\", \"reason\": str}\n \"\"\"\n # Step 0: Bash security validation (before deny rules)\n # Teaching version checks early for clarity.\n if tool_name == \"bash\":\n command = tool_input.get(\"command\", \"\")\n failures = bash_validator.validate(command)\n if failures:\n # Severe patterns (sudo, rm_rf) get immediate deny\n severe = {\"sudo\", \"rm_rf\"}\n severe_hits = [f for f in failures if f[0] in severe]\n if severe_hits:\n desc = bash_validator.describe_failures(command)\n return {\"behavior\": \"deny\",\n \"reason\": f\"Bash validator: {desc}\"}\n # Other patterns escalate to ask (user can still approve)\n desc = bash_validator.describe_failures(command)\n return {\"behavior\": \"ask\",\n \"reason\": f\"Bash validator flagged: {desc}\"}\n\n # Step 1: Deny rules (bypass-immune, checked first always)\n for rule in self.rules:\n if rule[\"behavior\"] != \"deny\":\n continue\n if self._matches(rule, tool_name, tool_input):\n return {\"behavior\": \"deny\",\n \"reason\": f\"Blocked by deny rule: {rule}\"}\n\n # Step 2: Mode-based decisions\n if self.mode == \"plan\":\n # Plan mode: deny all write operations, allow reads\n if tool_name in WRITE_TOOLS:\n return {\"behavior\": \"deny\",\n \"reason\": \"Plan mode: write operations are blocked\"}\n return {\"behavior\": \"allow\", \"reason\": \"Plan mode: read-only allowed\"}\n\n if self.mode == \"auto\":\n # Auto mode: auto-allow read-only tools, ask for writes\n if tool_name in READ_ONLY_TOOLS or tool_name == \"read_file\":\n return {\"behavior\": \"allow\",\n \"reason\": \"Auto mode: read-only tool auto-approved\"}\n # Teaching: fall through to allow rules, then ask\n pass\n\n # Step 3: Allow rules\n for rule in self.rules:\n if rule[\"behavior\"] != \"allow\":\n continue\n if self._matches(rule, tool_name, tool_input):\n self.consecutive_denials = 0\n return {\"behavior\": \"allow\",\n \"reason\": f\"Matched allow rule: {rule}\"}\n\n # Step 4: Ask user (default behavior for unmatched tools)\n return {\"behavior\": \"ask\",\n \"reason\": f\"No rule matched for {tool_name}, asking user\"}\n\n def ask_user(self, tool_name: str, tool_input: dict) -> bool:\n \"\"\"Interactive approval prompt. Returns True if approved.\"\"\"\n preview = json.dumps(tool_input, ensure_ascii=False)[:200]\n print(f\"\\n [Permission] {tool_name}: {preview}\")\n try:\n answer = input(\" Allow? (y/n/always): \").strip().lower()\n except (EOFError, KeyboardInterrupt):\n return False\n\n if answer == \"always\":\n # Add permanent allow rule for this tool\n self.rules.append({\"tool\": tool_name, \"path\": \"*\", \"behavior\": \"allow\"})\n self.consecutive_denials = 0\n return True\n if answer in (\"y\", \"yes\"):\n self.consecutive_denials = 0\n return True\n\n # Track denials for circuit breaker\n self.consecutive_denials += 1\n if self.consecutive_denials >= self.max_consecutive_denials:\n print(f\" [{self.consecutive_denials} consecutive denials -- \"\n \"consider switching to plan mode]\")\n return False\n\n def _matches(self, rule: dict, tool_name: str, tool_input: dict) -> bool:\n \"\"\"Check if a rule matches the tool call.\"\"\"\n # Tool name match\n if rule.get(\"tool\") and rule[\"tool\"] != \"*\":\n if rule[\"tool\"] != tool_name:\n return False\n # Path pattern match\n if \"path\" in rule and rule[\"path\"] != \"*\":\n path = tool_input.get(\"path\", \"\")\n if not fnmatch(path, rule[\"path\"]):\n return False\n # Content pattern match (for bash commands)\n if \"content\" in rule:\n command = tool_input.get(\"command\", \"\")\n if not fnmatch(command, rule[\"content\"]):\n return False\n return True\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\nThe user controls permissions. Some tool calls may be denied.\"\"\"\n\n\ndef agent_loop(messages: list, perms: PermissionManager):\n \"\"\"\n The permission-aware agent loop.\n\n For each tool call:\n 1. LLM requests tool use\n 2. Permission pipeline checks: deny_rules -> mode -> allow_rules -> ask\n 3. If allowed: execute tool, return result\n 4. If denied: return rejection message to LLM\n \"\"\"\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n # -- Permission check --\n decision = perms.check(block.name, block.input or {})\n\n if decision[\"behavior\"] == \"deny\":\n output = f\"Permission denied: {decision['reason']}\"\n print(f\" [DENIED] {block.name}: {decision['reason']}\")\n\n elif decision[\"behavior\"] == \"ask\":\n if perms.ask_user(block.name, block.input or {}):\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n else:\n output = f\"Permission denied by user for {block.name}\"\n print(f\" [USER DENIED] {block.name}\")\n\n else: # allow\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n # Choose permission mode at startup\n print(\"Permission modes: default, plan, auto\")\n mode_input = input(\"Mode (default): \").strip().lower() or \"default\"\n if mode_input not in MODES:\n mode_input = \"default\"\n\n perms = PermissionManager(mode=mode_input)\n print(f\"[Permission mode: {mode_input}]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms07 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n # /mode command to switch modes at runtime\n if query.startswith(\"/mode\"):\n parts = query.split()\n if len(parts) == 2 and parts[1] in MODES:\n perms.mode = parts[1]\n print(f\"[Switched to {parts[1]} mode]\")\n else:\n print(f\"Usage: /mode <{'|'.join(MODES)}>\")\n continue\n\n # /rules command to show current rules\n if query.strip() == \"/rules\":\n for i, rule in enumerate(perms.rules):\n print(f\" {i}: {rule}\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, perms)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ {
+ "id": "s08",
+ "filename": "s08_hook_system.py",
+ "title": "Hook System",
+ "subtitle": "Extend Without Rewriting the Loop",
+ "loc": 252,
+ "tools": [
+ "bash",
+ "read_file",
+ "write_file",
+ "edit_file"
+ ],
+ "newTools": [],
+ "coreAddition": "Lifecycle events + side-effect hooks",
+ "keyInsight": "The loop owns control flow; hooks only observe, block, or annotate at named moments.",
+ "classes": [
+ {
+ "name": "HookManager",
+ "startLine": 56,
+ "endLine": 177
+ }
+ ],
+ "functions": [
+ {
+ "name": "safe_path",
+ "signature": "def safe_path(p: str)",
+ "startLine": 178
+ },
+ {
+ "name": "run_bash",
+ "signature": "def run_bash(command: str)",
+ "startLine": 185
+ },
+ {
+ "name": "run_read",
+ "signature": "def run_read(path: str, limit: int = None)",
+ "startLine": 198
+ },
+ {
+ "name": "run_write",
+ "signature": "def run_write(path: str, content: str)",
+ "startLine": 208
+ },
+ {
+ "name": "run_edit",
+ "signature": "def run_edit(path: str, old_text: str, new_text: str)",
+ "startLine": 218
+ },
+ {
+ "name": "agent_loop",
+ "signature": "def agent_loop(messages: list, hooks: HookManager)",
+ "startLine": 251
+ }
+ ],
+ "layer": "hardening",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: extensibility -- injecting behavior without touching the loop.\n\"\"\"\ns08_hook_system.py - Hook System\n\nHooks are extension points around the main loop.\nThey let readers add behavior without rewriting the loop itself.\n\nTeaching version:\n - SessionStart\n - PreToolUse\n - PostToolUse\n\nTeaching exit-code contract:\n - 0 -> continue\n - 1 -> block\n - 2 -> inject a message\n\nThis is intentionally simpler than a production system. The goal here is to\nteach the extension pattern clearly before introducing event-specific edge\ncases.\n\nKey insight: \"Extend the agent without touching the loop.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# The teaching version keeps only the three clearest events. More complete\n# systems can grow the event surface later.\n\nHOOK_EVENTS = (\"PreToolUse\", \"PostToolUse\", \"SessionStart\")\nHOOK_TIMEOUT = 30 # seconds\n# Real CC timeouts:\n# TOOL_HOOK_EXECUTION_TIMEOUT_MS = 600000 (10 minutes for tool hooks)\n# SESSION_END_HOOK_TIMEOUT_MS = 1500 (1.5 seconds for SessionEnd hooks)\n\n# Workspace trust marker. Hooks only run if this file exists (or SDK mode).\nTRUST_MARKER = WORKDIR / \".claude\" / \".claude_trusted\"\n\n\nclass HookManager:\n \"\"\"\n Load and execute hooks from .hooks.json configuration.\n\n The hook manager does three simple jobs:\n - load hook definitions\n - run matching commands for an event\n - aggregate block / message results for the caller\n \"\"\"\n\n def __init__(self, config_path: Path = None, sdk_mode: bool = False):\n self.hooks = {\"PreToolUse\": [], \"PostToolUse\": [], \"SessionStart\": []}\n self._sdk_mode = sdk_mode\n config_path = config_path or (WORKDIR / \".hooks.json\")\n if config_path.exists():\n try:\n config = json.loads(config_path.read_text())\n for event in HOOK_EVENTS:\n self.hooks[event] = config.get(\"hooks\", {}).get(event, [])\n print(f\"[Hooks loaded from {config_path}]\")\n except Exception as e:\n print(f\"[Hook config error: {e}]\")\n\n def _check_workspace_trust(self) -> bool:\n \"\"\"\n Check whether the current workspace is trusted.\n\n The teaching version uses a simple trust marker file.\n In SDK mode, trust is treated as implicit.\n \"\"\"\n if self._sdk_mode:\n return True\n return TRUST_MARKER.exists()\n\n def run_hooks(self, event: str, context: dict = None) -> dict:\n \"\"\"\n Execute all hooks for an event.\n\n Returns: {\"blocked\": bool, \"messages\": list[str]}\n - blocked: True if any hook returned exit code 1\n - messages: stderr content from exit-code-2 hooks (to inject)\n \"\"\"\n result = {\"blocked\": False, \"messages\": []}\n\n # Trust gate: refuse to run hooks in untrusted workspaces\n if not self._check_workspace_trust():\n return result\n\n hooks = self.hooks.get(event, [])\n\n for hook_def in hooks:\n # Check matcher (tool name filter for PreToolUse/PostToolUse)\n matcher = hook_def.get(\"matcher\")\n if matcher and context:\n tool_name = context.get(\"tool_name\", \"\")\n if matcher != \"*\" and matcher != tool_name:\n continue\n\n command = hook_def.get(\"command\", \"\")\n if not command:\n continue\n\n # Build environment with hook context\n env = dict(os.environ)\n if context:\n env[\"HOOK_EVENT\"] = event\n env[\"HOOK_TOOL_NAME\"] = context.get(\"tool_name\", \"\")\n env[\"HOOK_TOOL_INPUT\"] = json.dumps(\n context.get(\"tool_input\", {}), ensure_ascii=False)[:10000]\n if \"tool_output\" in context:\n env[\"HOOK_TOOL_OUTPUT\"] = str(\n context[\"tool_output\"])[:10000]\n\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR, env=env,\n capture_output=True, text=True, timeout=HOOK_TIMEOUT,\n )\n\n if r.returncode == 0:\n # Continue silently\n if r.stdout.strip():\n print(f\" [hook:{event}] {r.stdout.strip()[:100]}\")\n\n # Optional structured stdout: small extension point that\n # keeps the teaching contract simple.\n try:\n hook_output = json.loads(r.stdout)\n if \"updatedInput\" in hook_output and context:\n context[\"tool_input\"] = hook_output[\"updatedInput\"]\n if \"additionalContext\" in hook_output:\n result[\"messages\"].append(\n hook_output[\"additionalContext\"])\n if \"permissionDecision\" in hook_output:\n result[\"permission_override\"] = (\n hook_output[\"permissionDecision\"])\n except (json.JSONDecodeError, TypeError):\n pass # stdout was not JSON -- normal for simple hooks\n\n elif r.returncode == 1:\n # Block execution\n result[\"blocked\"] = True\n reason = r.stderr.strip() or \"Blocked by hook\"\n result[\"block_reason\"] = reason\n print(f\" [hook:{event}] BLOCKED: {reason[:200]}\")\n\n elif r.returncode == 2:\n # Inject message\n msg = r.stderr.strip()\n if msg:\n result[\"messages\"].append(msg)\n print(f\" [hook:{event}] INJECT: {msg[:200]}\")\n\n except subprocess.TimeoutExpired:\n print(f\" [hook:{event}] Timeout ({HOOK_TIMEOUT}s)\")\n except Exception as e:\n print(f\" [hook:{event}] Error: {e}\")\n\n return result\n\n\n# -- Tool implementations (same as s02) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n\n\ndef agent_loop(messages: list, hooks: HookManager):\n \"\"\"\n The hook-aware agent loop.\n\n The teaching version keeps only the clearest integration points:\n SessionStart, PreToolUse, execute tool, PostToolUse.\n \"\"\"\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n tool_input = dict(block.input or {})\n ctx = {\"tool_name\": block.name, \"tool_input\": tool_input}\n\n # -- PreToolUse hooks --\n pre_result = hooks.run_hooks(\"PreToolUse\", ctx)\n\n # Inject hook messages into results\n for msg in pre_result.get(\"messages\", []):\n results.append({\n \"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": f\"[Hook message]: {msg}\",\n })\n\n if pre_result.get(\"blocked\"):\n reason = pre_result.get(\"block_reason\", \"Blocked by hook\")\n output = f\"Tool blocked by PreToolUse hook: {reason}\"\n results.append({\n \"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output,\n })\n continue\n\n # -- Execute tool --\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**tool_input) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n\n # -- PostToolUse hooks --\n ctx[\"tool_output\"] = output\n post_result = hooks.run_hooks(\"PostToolUse\", ctx)\n\n # Inject post-hook messages\n for msg in post_result.get(\"messages\", []):\n output += f\"\\n[Hook note]: {msg}\"\n\n results.append({\n \"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n hooks = HookManager()\n\n # Fire SessionStart hooks\n hooks.run_hooks(\"SessionStart\", {\"tool_name\": \"\", \"tool_input\": {}})\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, hooks)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): extensibility(可扩展性)——不改主循环即可注入行为。\n\"\"\"\ns08_hook_system.py - Hook System(钩子系统)\n\nHook(钩子)是主循环周边的扩展点。\n它允许在不重写循环的前提下增量添加行为。\n\n教学版包含:\n - SessionStart(会话开始)\n - PreToolUse(工具调用前)\n - PostToolUse(工具调用后)\n\n教学版退出码约定:\n - 0 -> continue(继续)\n - 1 -> block(阻断)\n - 2 -> inject a message(注入消息)\n\n这里刻意简化于生产系统,先把扩展模式讲清楚,再进入事件级边界细节。\n\n关键洞察:\n\"不改主循环,也能扩展智能体。\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\n\n# 教学版仅保留最清晰的三个事件;完整系统可继续扩展事件面。\n\nHOOK_EVENTS = (\"PreToolUse\", \"PostToolUse\", \"SessionStart\")\nHOOK_TIMEOUT = 30 # 秒\n# 真实 Claude Code 的超时配置:\n# TOOL_HOOK_EXECUTION_TIMEOUT_MS = 600000(工具 hook 10 分钟)\n# SESSION_END_HOOK_TIMEOUT_MS = 1500(SessionEnd hook 1.5 秒)\n\n# 工作区信任标记。仅在该文件存在(或 SDK 模式)时运行 hooks。\nTRUST_MARKER = WORKDIR / \".claude\" / \".claude_trusted\"\n\n\nclass HookManager:\n \"\"\"\n 从 `.hooks.json` 加载并执行 hooks。\n\n hook 管理器的三个核心职责:\n - 加载 hook 定义\n - 按事件执行匹配命令\n - 聚合 block/message 结果供调用方处理\n \"\"\"\n\n def __init__(self, config_path: Path = None, sdk_mode: bool = False):\n self.hooks = {\"PreToolUse\": [], \"PostToolUse\": [], \"SessionStart\": []}\n self._sdk_mode = sdk_mode\n config_path = config_path or (WORKDIR / \".hooks.json\")\n if config_path.exists():\n try:\n config = json.loads(config_path.read_text())\n for event in HOOK_EVENTS:\n self.hooks[event] = config.get(\"hooks\", {}).get(event, [])\n print(f\"[Hooks loaded from {config_path}]\")\n except Exception as e:\n print(f\"[Hook config error: {e}]\")\n\n def _check_workspace_trust(self) -> bool:\n \"\"\"\n 检查当前工作区是否 trusted(可信)。\n\n 教学版使用简单信任标记文件;\n SDK 模式下默认视为可信。\n \"\"\"\n if self._sdk_mode:\n return True\n return TRUST_MARKER.exists()\n\n def run_hooks(self, event: str, context: dict = None) -> dict:\n \"\"\"\n 执行某事件对应的所有 hooks。\n\n 返回:{\"blocked\": bool, \"messages\": list[str]}\n - blocked: 任一 hook 返回退出码 1 时为 True\n - messages: 收集退出码 2 的 stderr 内容(可注入会话)\n \"\"\"\n result = {\"blocked\": False, \"messages\": []}\n\n # 信任门控:不可信工作区不执行 hooks\n if not self._check_workspace_trust():\n return result\n\n hooks = self.hooks.get(event, [])\n\n for hook_def in hooks:\n # 检查 matcher(主要用于 PreToolUse/PostToolUse 的工具名过滤)\n matcher = hook_def.get(\"matcher\")\n if matcher and context:\n tool_name = context.get(\"tool_name\", \"\")\n if matcher != \"*\" and matcher != tool_name:\n continue\n\n command = hook_def.get(\"command\", \"\")\n if not command:\n continue\n\n # 构建 hook 执行环境变量\n env = dict(os.environ)\n if context:\n env[\"HOOK_EVENT\"] = event\n env[\"HOOK_TOOL_NAME\"] = context.get(\"tool_name\", \"\")\n env[\"HOOK_TOOL_INPUT\"] = json.dumps(\n context.get(\"tool_input\", {}), ensure_ascii=False)[:10000]\n if \"tool_output\" in context:\n env[\"HOOK_TOOL_OUTPUT\"] = str(\n context[\"tool_output\"])[:10000]\n\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR, env=env,\n capture_output=True, text=True, timeout=HOOK_TIMEOUT,\n )\n\n if r.returncode == 0:\n # 正常继续\n if r.stdout.strip():\n print(f\" [hook:{event}] {r.stdout.strip()[:100]}\")\n\n # 可选结构化 stdout:在保持教学契约简洁的前提下提供扩展点。\n try:\n hook_output = json.loads(r.stdout)\n if \"updatedInput\" in hook_output and context:\n context[\"tool_input\"] = hook_output[\"updatedInput\"]\n if \"additionalContext\" in hook_output:\n result[\"messages\"].append(\n hook_output[\"additionalContext\"])\n if \"permissionDecision\" in hook_output:\n result[\"permission_override\"] = (\n hook_output[\"permissionDecision\"])\n except (json.JSONDecodeError, TypeError):\n pass # stdout 非 JSON,属于常见简化 hook 形态\n\n elif r.returncode == 1:\n # 阻断执行\n result[\"blocked\"] = True\n reason = r.stderr.strip() or \"Blocked by hook\"\n result[\"block_reason\"] = reason\n print(f\" [hook:{event}] BLOCKED: {reason[:200]}\")\n\n elif r.returncode == 2:\n # 注入消息\n msg = r.stderr.strip()\n if msg:\n result[\"messages\"].append(msg)\n print(f\" [hook:{event}] INJECT: {msg[:200]}\")\n\n except subprocess.TimeoutExpired:\n print(f\" [hook:{event}] Timeout ({HOOK_TIMEOUT}s)\")\n except Exception as e:\n print(f\" [hook:{event}] Error: {e}\")\n\n return result\n\n\n# -- 工具实现(与 s02 相同) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\nSYSTEM = f\"你是位于 {WORKDIR} 的 coding agent(编码智能体),请使用工具解决任务。\"\n\n\ndef agent_loop(messages: list, hooks: HookManager):\n \"\"\"\n 带 hook 感知的智能体循环。\n\n 教学版只保留最清晰的接入点:\n SessionStart、PreToolUse、工具执行、PostToolUse。\n \"\"\"\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n tool_input = dict(block.input or {})\n ctx = {\"tool_name\": block.name, \"tool_input\": tool_input}\n\n # -- PreToolUse hooks(前置工具 hook) --\n pre_result = hooks.run_hooks(\"PreToolUse\", ctx)\n\n # 将 hook 消息注入 tool_result\n for msg in pre_result.get(\"messages\", []):\n results.append({\n \"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": f\"[Hook 消息]: {msg}\",\n })\n\n if pre_result.get(\"blocked\"):\n reason = pre_result.get(\"block_reason\", \"被 hook 拦截\")\n output = f\"工具被 PreToolUse hook 拦截:{reason}\"\n results.append({\n \"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output,\n })\n continue\n\n # -- 执行工具 --\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**tool_input) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n\n # -- PostToolUse hooks(后置工具 hook) --\n ctx[\"tool_output\"] = output\n post_result = hooks.run_hooks(\"PostToolUse\", ctx)\n\n # 注入 post-hook 消息\n for msg in post_result.get(\"messages\", []):\n output += f\"\\n[Hook note]: {msg}\"\n\n results.append({\n \"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n hooks = HookManager()\n\n # 触发 SessionStart hooks\n hooks.run_hooks(\"SessionStart\", {\"tool_name\": \"\", \"tool_input\": {}})\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, hooks)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: extensibility -- injecting behavior without touching the loop.\n\"\"\"\ns08_hook_system.py - Hook System\n\nHooks are extension points around the main loop.\nThey let readers add behavior without rewriting the loop itself.\n\nTeaching version:\n - SessionStart\n - PreToolUse\n - PostToolUse\n\nTeaching exit-code contract:\n - 0 -> continue\n - 1 -> block\n - 2 -> inject a message\n\nThis is intentionally simpler than a production system. The goal here is to\nteach the extension pattern clearly before introducing event-specific edge\ncases.\n\nKey insight: \"Extend the agent without touching the loop.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# The teaching version keeps only the three clearest events. More complete\n# systems can grow the event surface later.\n\nHOOK_EVENTS = (\"PreToolUse\", \"PostToolUse\", \"SessionStart\")\nHOOK_TIMEOUT = 30 # seconds\n# Real CC timeouts:\n# TOOL_HOOK_EXECUTION_TIMEOUT_MS = 600000 (10 minutes for tool hooks)\n# SESSION_END_HOOK_TIMEOUT_MS = 1500 (1.5 seconds for SessionEnd hooks)\n\n# Workspace trust marker. Hooks only run if this file exists (or SDK mode).\nTRUST_MARKER = WORKDIR / \".claude\" / \".claude_trusted\"\n\n\nclass HookManager:\n \"\"\"\n Load and execute hooks from .hooks.json configuration.\n\n The hook manager does three simple jobs:\n - load hook definitions\n - run matching commands for an event\n - aggregate block / message results for the caller\n \"\"\"\n\n def __init__(self, config_path: Path = None, sdk_mode: bool = False):\n self.hooks = {\"PreToolUse\": [], \"PostToolUse\": [], \"SessionStart\": []}\n self._sdk_mode = sdk_mode\n config_path = config_path or (WORKDIR / \".hooks.json\")\n if config_path.exists():\n try:\n config = json.loads(config_path.read_text())\n for event in HOOK_EVENTS:\n self.hooks[event] = config.get(\"hooks\", {}).get(event, [])\n print(f\"[Hooks loaded from {config_path}]\")\n except Exception as e:\n print(f\"[Hook config error: {e}]\")\n\n def _check_workspace_trust(self) -> bool:\n \"\"\"\n Check whether the current workspace is trusted.\n\n The teaching version uses a simple trust marker file.\n In SDK mode, trust is treated as implicit.\n \"\"\"\n if self._sdk_mode:\n return True\n return TRUST_MARKER.exists()\n\n def run_hooks(self, event: str, context: dict = None) -> dict:\n \"\"\"\n Execute all hooks for an event.\n\n Returns: {\"blocked\": bool, \"messages\": list[str]}\n - blocked: True if any hook returned exit code 1\n - messages: stderr content from exit-code-2 hooks (to inject)\n \"\"\"\n result = {\"blocked\": False, \"messages\": []}\n\n # Trust gate: refuse to run hooks in untrusted workspaces\n if not self._check_workspace_trust():\n return result\n\n hooks = self.hooks.get(event, [])\n\n for hook_def in hooks:\n # Check matcher (tool name filter for PreToolUse/PostToolUse)\n matcher = hook_def.get(\"matcher\")\n if matcher and context:\n tool_name = context.get(\"tool_name\", \"\")\n if matcher != \"*\" and matcher != tool_name:\n continue\n\n command = hook_def.get(\"command\", \"\")\n if not command:\n continue\n\n # Build environment with hook context\n env = dict(os.environ)\n if context:\n env[\"HOOK_EVENT\"] = event\n env[\"HOOK_TOOL_NAME\"] = context.get(\"tool_name\", \"\")\n env[\"HOOK_TOOL_INPUT\"] = json.dumps(\n context.get(\"tool_input\", {}), ensure_ascii=False)[:10000]\n if \"tool_output\" in context:\n env[\"HOOK_TOOL_OUTPUT\"] = str(\n context[\"tool_output\"])[:10000]\n\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR, env=env,\n capture_output=True, text=True, timeout=HOOK_TIMEOUT,\n )\n\n if r.returncode == 0:\n # Continue silently\n if r.stdout.strip():\n print(f\" [hook:{event}] {r.stdout.strip()[:100]}\")\n\n # Optional structured stdout: small extension point that\n # keeps the teaching contract simple.\n try:\n hook_output = json.loads(r.stdout)\n if \"updatedInput\" in hook_output and context:\n context[\"tool_input\"] = hook_output[\"updatedInput\"]\n if \"additionalContext\" in hook_output:\n result[\"messages\"].append(\n hook_output[\"additionalContext\"])\n if \"permissionDecision\" in hook_output:\n result[\"permission_override\"] = (\n hook_output[\"permissionDecision\"])\n except (json.JSONDecodeError, TypeError):\n pass # stdout was not JSON -- normal for simple hooks\n\n elif r.returncode == 1:\n # Block execution\n result[\"blocked\"] = True\n reason = r.stderr.strip() or \"Blocked by hook\"\n result[\"block_reason\"] = reason\n print(f\" [hook:{event}] BLOCKED: {reason[:200]}\")\n\n elif r.returncode == 2:\n # Inject message\n msg = r.stderr.strip()\n if msg:\n result[\"messages\"].append(msg)\n print(f\" [hook:{event}] INJECT: {msg[:200]}\")\n\n except subprocess.TimeoutExpired:\n print(f\" [hook:{event}] Timeout ({HOOK_TIMEOUT}s)\")\n except Exception as e:\n print(f\" [hook:{event}] Error: {e}\")\n\n return result\n\n\n# -- Tool implementations (same as s02) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n\n\ndef agent_loop(messages: list, hooks: HookManager):\n \"\"\"\n The hook-aware agent loop.\n\n The teaching version keeps only the clearest integration points:\n SessionStart, PreToolUse, execute tool, PostToolUse.\n \"\"\"\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n tool_input = dict(block.input or {})\n ctx = {\"tool_name\": block.name, \"tool_input\": tool_input}\n\n # -- PreToolUse hooks --\n pre_result = hooks.run_hooks(\"PreToolUse\", ctx)\n\n # Inject hook messages into results\n for msg in pre_result.get(\"messages\", []):\n results.append({\n \"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": f\"[Hook message]: {msg}\",\n })\n\n if pre_result.get(\"blocked\"):\n reason = pre_result.get(\"block_reason\", \"Blocked by hook\")\n output = f\"Tool blocked by PreToolUse hook: {reason}\"\n results.append({\n \"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output,\n })\n continue\n\n # -- Execute tool --\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**tool_input) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n\n # -- PostToolUse hooks --\n ctx[\"tool_output\"] = output\n post_result = hooks.run_hooks(\"PostToolUse\", ctx)\n\n # Inject post-hook messages\n for msg in post_result.get(\"messages\", []):\n output += f\"\\n[Hook note]: {msg}\"\n\n results.append({\n \"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n hooks = HookManager()\n\n # Fire SessionStart hooks\n hooks.run_hooks(\"SessionStart\", {\"tool_name\": \"\", \"tool_input\": {}})\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, hooks)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: extensibility -- injecting behavior without touching the loop.\n\"\"\"\ns08_hook_system.py - Hook System\n\nHooks are extension points around the main loop.\nThey let readers add behavior without rewriting the loop itself.\n\nTeaching version:\n - SessionStart\n - PreToolUse\n - PostToolUse\n\nTeaching exit-code contract:\n - 0 -> continue\n - 1 -> block\n - 2 -> inject a message\n\nThis is intentionally simpler than a production system. The goal here is to\nteach the extension pattern clearly before introducing event-specific edge\ncases.\n\nKey insight: \"Extend the agent without touching the loop.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# The teaching version keeps only the three clearest events. More complete\n# systems can grow the event surface later.\n\nHOOK_EVENTS = (\"PreToolUse\", \"PostToolUse\", \"SessionStart\")\nHOOK_TIMEOUT = 30 # seconds\n# Real CC timeouts:\n# TOOL_HOOK_EXECUTION_TIMEOUT_MS = 600000 (10 minutes for tool hooks)\n# SESSION_END_HOOK_TIMEOUT_MS = 1500 (1.5 seconds for SessionEnd hooks)\n\n# Workspace trust marker. Hooks only run if this file exists (or SDK mode).\nTRUST_MARKER = WORKDIR / \".claude\" / \".claude_trusted\"\n\n\nclass HookManager:\n \"\"\"\n Load and execute hooks from .hooks.json configuration.\n\n The hook manager does three simple jobs:\n - load hook definitions\n - run matching commands for an event\n - aggregate block / message results for the caller\n \"\"\"\n\n def __init__(self, config_path: Path = None, sdk_mode: bool = False):\n self.hooks = {\"PreToolUse\": [], \"PostToolUse\": [], \"SessionStart\": []}\n self._sdk_mode = sdk_mode\n config_path = config_path or (WORKDIR / \".hooks.json\")\n if config_path.exists():\n try:\n config = json.loads(config_path.read_text())\n for event in HOOK_EVENTS:\n self.hooks[event] = config.get(\"hooks\", {}).get(event, [])\n print(f\"[Hooks loaded from {config_path}]\")\n except Exception as e:\n print(f\"[Hook config error: {e}]\")\n\n def _check_workspace_trust(self) -> bool:\n \"\"\"\n Check whether the current workspace is trusted.\n\n The teaching version uses a simple trust marker file.\n In SDK mode, trust is treated as implicit.\n \"\"\"\n if self._sdk_mode:\n return True\n return TRUST_MARKER.exists()\n\n def run_hooks(self, event: str, context: dict = None) -> dict:\n \"\"\"\n Execute all hooks for an event.\n\n Returns: {\"blocked\": bool, \"messages\": list[str]}\n - blocked: True if any hook returned exit code 1\n - messages: stderr content from exit-code-2 hooks (to inject)\n \"\"\"\n result = {\"blocked\": False, \"messages\": []}\n\n # Trust gate: refuse to run hooks in untrusted workspaces\n if not self._check_workspace_trust():\n return result\n\n hooks = self.hooks.get(event, [])\n\n for hook_def in hooks:\n # Check matcher (tool name filter for PreToolUse/PostToolUse)\n matcher = hook_def.get(\"matcher\")\n if matcher and context:\n tool_name = context.get(\"tool_name\", \"\")\n if matcher != \"*\" and matcher != tool_name:\n continue\n\n command = hook_def.get(\"command\", \"\")\n if not command:\n continue\n\n # Build environment with hook context\n env = dict(os.environ)\n if context:\n env[\"HOOK_EVENT\"] = event\n env[\"HOOK_TOOL_NAME\"] = context.get(\"tool_name\", \"\")\n env[\"HOOK_TOOL_INPUT\"] = json.dumps(\n context.get(\"tool_input\", {}), ensure_ascii=False)[:10000]\n if \"tool_output\" in context:\n env[\"HOOK_TOOL_OUTPUT\"] = str(\n context[\"tool_output\"])[:10000]\n\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR, env=env,\n capture_output=True, text=True, timeout=HOOK_TIMEOUT,\n )\n\n if r.returncode == 0:\n # Continue silently\n if r.stdout.strip():\n print(f\" [hook:{event}] {r.stdout.strip()[:100]}\")\n\n # Optional structured stdout: small extension point that\n # keeps the teaching contract simple.\n try:\n hook_output = json.loads(r.stdout)\n if \"updatedInput\" in hook_output and context:\n context[\"tool_input\"] = hook_output[\"updatedInput\"]\n if \"additionalContext\" in hook_output:\n result[\"messages\"].append(\n hook_output[\"additionalContext\"])\n if \"permissionDecision\" in hook_output:\n result[\"permission_override\"] = (\n hook_output[\"permissionDecision\"])\n except (json.JSONDecodeError, TypeError):\n pass # stdout was not JSON -- normal for simple hooks\n\n elif r.returncode == 1:\n # Block execution\n result[\"blocked\"] = True\n reason = r.stderr.strip() or \"Blocked by hook\"\n result[\"block_reason\"] = reason\n print(f\" [hook:{event}] BLOCKED: {reason[:200]}\")\n\n elif r.returncode == 2:\n # Inject message\n msg = r.stderr.strip()\n if msg:\n result[\"messages\"].append(msg)\n print(f\" [hook:{event}] INJECT: {msg[:200]}\")\n\n except subprocess.TimeoutExpired:\n print(f\" [hook:{event}] Timeout ({HOOK_TIMEOUT}s)\")\n except Exception as e:\n print(f\" [hook:{event}] Error: {e}\")\n\n return result\n\n\n# -- Tool implementations (same as s02) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n\n\ndef agent_loop(messages: list, hooks: HookManager):\n \"\"\"\n The hook-aware agent loop.\n\n The teaching version keeps only the clearest integration points:\n SessionStart, PreToolUse, execute tool, PostToolUse.\n \"\"\"\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n\n tool_input = dict(block.input or {})\n ctx = {\"tool_name\": block.name, \"tool_input\": tool_input}\n\n # -- PreToolUse hooks --\n pre_result = hooks.run_hooks(\"PreToolUse\", ctx)\n\n # Inject hook messages into results\n for msg in pre_result.get(\"messages\", []):\n results.append({\n \"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": f\"[Hook message]: {msg}\",\n })\n\n if pre_result.get(\"blocked\"):\n reason = pre_result.get(\"block_reason\", \"Blocked by hook\")\n output = f\"Tool blocked by PreToolUse hook: {reason}\"\n results.append({\n \"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output,\n })\n continue\n\n # -- Execute tool --\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**tool_input) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n\n # -- PostToolUse hooks --\n ctx[\"tool_output\"] = output\n post_result = hooks.run_hooks(\"PostToolUse\", ctx)\n\n # Inject post-hook messages\n for msg in post_result.get(\"messages\", []):\n output += f\"\\n[Hook note]: {msg}\"\n\n results.append({\n \"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n hooks = HookManager()\n\n # Fire SessionStart hooks\n hooks.run_hooks(\"SessionStart\", {\"tool_name\": \"\", \"tool_input\": {}})\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history, hooks)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ {
+ "id": "s09",
+ "filename": "s09_memory_system.py",
+ "title": "Memory System",
+ "subtitle": "Keep Only What Survives Sessions",
+ "loc": 414,
+ "tools": [
+ "bash",
+ "read_file",
+ "write_file",
+ "edit_file",
+ "save_memory"
+ ],
+ "newTools": [
+ "save_memory"
+ ],
+ "coreAddition": "Typed memory records + reload path",
+ "keyInsight": "Memory gives direction; current observation gives truth.",
+ "classes": [
+ {
+ "name": "MemoryManager",
+ "startLine": 64,
+ "endLine": 189
+ },
+ {
+ "name": "DreamConsolidator",
+ "startLine": 190,
+ "endLine": 345
+ }
+ ],
+ "functions": [
+ {
+ "name": "safe_path",
+ "signature": "def safe_path(p: str)",
+ "startLine": 346
+ },
+ {
+ "name": "run_bash",
+ "signature": "def run_bash(command: str)",
+ "startLine": 353
+ },
+ {
+ "name": "run_read",
+ "signature": "def run_read(path: str, limit: int = None)",
+ "startLine": 366
+ },
+ {
+ "name": "run_write",
+ "signature": "def run_write(path: str, content: str)",
+ "startLine": 376
+ },
+ {
+ "name": "run_edit",
+ "signature": "def run_edit(path: str, old_text: str, new_text: str)",
+ "startLine": 386
+ },
+ {
+ "name": "run_save_memory",
+ "signature": "def run_save_memory(name: str, description: str, mem_type: str, content: str)",
+ "startLine": 402
+ },
+ {
+ "name": "build_system_prompt",
+ "signature": "def build_system_prompt()",
+ "startLine": 450
+ },
+ {
+ "name": "agent_loop",
+ "signature": "def agent_loop(messages: list)",
+ "startLine": 463
}
],
- "layer": "planning",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns04_subagent.py - Subagents\n\nSpawn a child agent with fresh messages=[]. The child works in its own\ncontext, sharing the filesystem, then returns only a summary to the parent.\n\n Parent agent Subagent\n +------------------+ +------------------+\n | messages=[...] | | messages=[] | <-- fresh\n | | dispatch | |\n | tool: task | ---------->| while tool_use: |\n | prompt=\"...\" | | call tools |\n | description=\"\" | | append results |\n | | summary | |\n | result = \"...\" | <--------- | return last text |\n +------------------+ +------------------+\n |\n Parent context stays clean.\n Subagent context is discarded.\n\nKey insight: \"Process isolation gives context isolation for free.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use the task tool to delegate exploration or subtasks.\"\nSUBAGENT_SYSTEM = f\"You are a coding subagent at {WORKDIR}. Complete the given task, then summarize your findings.\"\n\n\n# -- Tool implementations shared by parent and child --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\n# Child gets all base tools except task (no recursive spawning)\nCHILD_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- Subagent: fresh context, filtered tools, summary-only return --\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}] # fresh context\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n # Only the final text returns to the parent -- child context is discarded\n return \"\".join(b.text for b in response.content if hasattr(b, \"text\")) or \"(no summary)\"\n\n\n# -- Parent tools: base tools + task dispatcher --\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\", \"description\": \"Spawn a subagent with fresh context. It shares the filesystem but not conversation history.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"prompt\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\", \"description\": \"Short description of the task\"}}, \"required\": [\"prompt\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=PARENT_TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"task\":\n desc = block.input.get(\"description\", \"subtask\")\n print(f\"> task ({desc}): {block.input['prompt'][:80]}\")\n output = run_subagent(block.input[\"prompt\"])\n else:\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\" {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms04 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ "layer": "hardening",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: persistence -- remembering across the session boundary.\n\"\"\"\ns09_memory_system.py - Memory System\n\nThis teaching version focuses on one core idea:\nsome information should survive the current conversation, but not everything\nbelongs in memory.\n\nUse memory for:\n - user preferences\n - repeated user feedback\n - project facts that are NOT obvious from the current code\n - pointers to external resources\n\nDo NOT use memory for:\n - code structure that can be re-read from the repo\n - temporary task state\n - secrets\n\nStorage layout:\n .memory/\n MEMORY.md\n prefer_tabs.md\n review_style.md\n incident_board.md\n\nEach memory is a small Markdown file with frontmatter.\nThe agent can save a memory through save_memory(), and the memory index\nis rebuilt after each write.\n\nAn optional \"Dream\" pass can later consolidate, deduplicate, and prune\nstored memories. It is useful, but it is not the first thing readers need\nto understand.\n\nKey insight: \"Memory only stores cross-session information that is still\nworth recalling later and is not easy to re-derive from the current repo.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nMEMORY_TYPES = (\"user\", \"feedback\", \"project\", \"reference\")\nMAX_INDEX_LINES = 200\n\n\nclass MemoryManager:\n \"\"\"\n Load, build, and save persistent memories across sessions.\n\n The teaching version keeps memory explicit:\n one Markdown file per memory, plus one compact index file.\n \"\"\"\n\n def __init__(self, memory_dir: Path = None):\n self.memory_dir = memory_dir or MEMORY_DIR\n self.memories = {} # name -> {description, type, content}\n\n def load_all(self):\n \"\"\"Load MEMORY.md index and all individual memory files.\"\"\"\n self.memories = {}\n if not self.memory_dir.exists():\n return\n\n # Scan all .md files except MEMORY.md\n for md_file in sorted(self.memory_dir.glob(\"*.md\")):\n if md_file.name == \"MEMORY.md\":\n continue\n parsed = self._parse_frontmatter(md_file.read_text())\n if parsed:\n name = parsed.get(\"name\", md_file.stem)\n self.memories[name] = {\n \"description\": parsed.get(\"description\", \"\"),\n \"type\": parsed.get(\"type\", \"project\"),\n \"content\": parsed.get(\"content\", \"\"),\n \"file\": md_file.name,\n }\n\n count = len(self.memories)\n if count > 0:\n print(f\"[Memory loaded: {count} memories from {self.memory_dir}]\")\n\n def load_memory_prompt(self) -> str:\n \"\"\"Build a memory section for injection into the system prompt.\"\"\"\n if not self.memories:\n return \"\"\n\n sections = []\n sections.append(\"# Memories (persistent across sessions)\")\n sections.append(\"\")\n\n # Group by type for readability\n for mem_type in MEMORY_TYPES:\n typed = {k: v for k, v in self.memories.items() if v[\"type\"] == mem_type}\n if not typed:\n continue\n sections.append(f\"## [{mem_type}]\")\n for name, mem in typed.items():\n sections.append(f\"### {name}: {mem['description']}\")\n if mem[\"content\"].strip():\n sections.append(mem[\"content\"].strip())\n sections.append(\"\")\n\n return \"\\n\".join(sections)\n\n def save_memory(self, name: str, description: str, mem_type: str, content: str) -> str:\n \"\"\"\n Save a memory to disk and update the index.\n\n Returns a status message.\n \"\"\"\n if mem_type not in MEMORY_TYPES:\n return f\"Error: type must be one of {MEMORY_TYPES}\"\n\n # Sanitize name for filename\n safe_name = re.sub(r\"[^a-zA-Z0-9_-]\", \"_\", name.lower())\n if not safe_name:\n return \"Error: invalid memory name\"\n\n self.memory_dir.mkdir(parents=True, exist_ok=True)\n\n # Write individual memory file with frontmatter\n frontmatter = (\n f\"---\\n\"\n f\"name: {name}\\n\"\n f\"description: {description}\\n\"\n f\"type: {mem_type}\\n\"\n f\"---\\n\"\n f\"{content}\\n\"\n )\n file_name = f\"{safe_name}.md\"\n file_path = self.memory_dir / file_name\n file_path.write_text(frontmatter)\n\n # Update in-memory store\n self.memories[name] = {\n \"description\": description,\n \"type\": mem_type,\n \"content\": content,\n \"file\": file_name,\n }\n\n # Rebuild MEMORY.md index\n self._rebuild_index()\n\n return f\"Saved memory '{name}' [{mem_type}] to {file_path.relative_to(WORKDIR)}\"\n\n def _rebuild_index(self):\n \"\"\"Rebuild MEMORY.md from current in-memory state, capped at 200 lines.\"\"\"\n lines = [\"# Memory Index\", \"\"]\n for name, mem in self.memories.items():\n lines.append(f\"- {name}: {mem['description']} [{mem['type']}]\")\n if len(lines) >= MAX_INDEX_LINES:\n lines.append(f\"... (truncated at {MAX_INDEX_LINES} lines)\")\n break\n self.memory_dir.mkdir(parents=True, exist_ok=True)\n MEMORY_INDEX.write_text(\"\\n\".join(lines) + \"\\n\")\n\n def _parse_frontmatter(self, text: str) -> dict | None:\n \"\"\"Parse --- delimited frontmatter + body content.\"\"\"\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n(.*)\", text, re.DOTALL)\n if not match:\n return None\n header, body = match.group(1), match.group(2)\n result = {\"content\": body.strip()}\n for line in header.splitlines():\n if \":\" in line:\n key, _, value = line.partition(\":\")\n result[key.strip()] = value.strip()\n return result\n\n\nclass DreamConsolidator:\n \"\"\"\n Auto-consolidation of memories between sessions (\"Dream\").\n\n This is an optional later-stage feature. Its job is to prevent the memory\n store from growing into a noisy pile by merging, deduplicating, and\n pruning entries over time.\n \"\"\"\n\n COOLDOWN_SECONDS = 86400 # 24 hours between consolidations\n SCAN_THROTTLE_SECONDS = 600 # 10 minutes between scan attempts\n MIN_SESSION_COUNT = 5 # need enough data to consolidate\n LOCK_STALE_SECONDS = 3600 # PID lock considered stale after 1 hour\n\n PHASES = [\n \"Orient: scan MEMORY.md index for structure and categories\",\n \"Gather: read individual memory files for full content\",\n \"Consolidate: merge related memories, remove stale entries\",\n \"Prune: enforce 200-line limit on MEMORY.md index\",\n ]\n\n def __init__(self, memory_dir: Path = None):\n self.memory_dir = memory_dir or MEMORY_DIR\n self.lock_file = self.memory_dir / \".dream_lock\"\n self.enabled = True\n self.mode = \"default\"\n self.last_consolidation_time = 0.0\n self.last_scan_time = 0.0\n self.session_count = 0\n\n def should_consolidate(self) -> tuple[bool, str]:\n \"\"\"\n Check 7 gates in sequence. All must pass.\n Returns (can_run, reason) where reason explains the first failed gate.\n \"\"\"\n import time\n\n now = time.time()\n\n # Gate 1: enabled flag\n if not self.enabled:\n return False, \"Gate 1: consolidation is disabled\"\n\n # Gate 2: memory directory exists and has memory files\n if not self.memory_dir.exists():\n return False, \"Gate 2: memory directory does not exist\"\n memory_files = list(self.memory_dir.glob(\"*.md\"))\n # Exclude MEMORY.md itself from the count\n memory_files = [f for f in memory_files if f.name != \"MEMORY.md\"]\n if not memory_files:\n return False, \"Gate 2: no memory files found\"\n\n # Gate 3: not in plan mode (only consolidate in active modes)\n if self.mode == \"plan\":\n return False, \"Gate 3: plan mode does not allow consolidation\"\n\n # Gate 4: 24-hour cooldown since last consolidation\n time_since_last = now - self.last_consolidation_time\n if time_since_last < self.COOLDOWN_SECONDS:\n remaining = int(self.COOLDOWN_SECONDS - time_since_last)\n return False, f\"Gate 4: cooldown active, {remaining}s remaining\"\n\n # Gate 5: 10-minute throttle since last scan attempt\n time_since_scan = now - self.last_scan_time\n if time_since_scan < self.SCAN_THROTTLE_SECONDS:\n remaining = int(self.SCAN_THROTTLE_SECONDS - time_since_scan)\n return False, f\"Gate 5: scan throttle active, {remaining}s remaining\"\n\n # Gate 6: need at least 5 sessions worth of data\n if self.session_count < self.MIN_SESSION_COUNT:\n return False, f\"Gate 6: only {self.session_count} sessions, need {self.MIN_SESSION_COUNT}\"\n\n # Gate 7: no active lock file (check PID staleness)\n if not self._acquire_lock():\n return False, \"Gate 7: lock held by another process\"\n\n return True, \"All 7 gates passed\"\n\n def consolidate(self) -> list[str]:\n \"\"\"\n Run the 4-phase consolidation process.\n\n The teaching version returns phase descriptions to make the flow\n visible without requiring an extra LLM pass here.\n \"\"\"\n import time\n\n can_run, reason = self.should_consolidate()\n if not can_run:\n print(f\"[Dream] Cannot consolidate: {reason}\")\n return []\n\n print(\"[Dream] Starting consolidation...\")\n self.last_scan_time = time.time()\n\n completed_phases = []\n for i, phase in enumerate(self.PHASES, 1):\n print(f\"[Dream] Phase {i}/4: {phase}\")\n completed_phases.append(phase)\n\n self.last_consolidation_time = time.time()\n self._release_lock()\n print(f\"[Dream] Consolidation complete: {len(completed_phases)} phases executed\")\n return completed_phases\n\n def _acquire_lock(self) -> bool:\n \"\"\"\n Acquire a PID-based lock file. Returns False if locked by another\n live process. Stale locks (older than LOCK_STALE_SECONDS) are removed.\n \"\"\"\n import time\n\n if self.lock_file.exists():\n try:\n lock_data = self.lock_file.read_text().strip()\n pid_str, timestamp_str = lock_data.split(\":\", 1)\n pid = int(pid_str)\n lock_time = float(timestamp_str)\n\n # Check if lock is stale\n if (time.time() - lock_time) > self.LOCK_STALE_SECONDS:\n print(f\"[Dream] Removing stale lock from PID {pid}\")\n self.lock_file.unlink()\n else:\n # Check if owning process is still alive\n try:\n os.kill(pid, 0)\n return False # process alive, lock is valid\n except OSError:\n print(f\"[Dream] Removing lock from dead PID {pid}\")\n self.lock_file.unlink()\n except (ValueError, OSError):\n # Corrupted lock file, remove it\n self.lock_file.unlink(missing_ok=True)\n\n # Write new lock\n try:\n self.memory_dir.mkdir(parents=True, exist_ok=True)\n self.lock_file.write_text(f\"{os.getpid()}:{time.time()}\")\n return True\n except OSError:\n return False\n\n def _release_lock(self):\n \"\"\"Release the lock file if we own it.\"\"\"\n try:\n if self.lock_file.exists():\n lock_data = self.lock_file.read_text().strip()\n pid_str = lock_data.split(\":\")[0]\n if int(pid_str) == os.getpid():\n self.lock_file.unlink()\n except (ValueError, OSError):\n pass\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Global memory manager\nmemory_mgr = MemoryManager()\n\n\ndef run_save_memory(name: str, description: str, mem_type: str, content: str) -> str:\n return memory_mgr.save_memory(name, description, mem_type, content)\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"save_memory\": lambda **kw: run_save_memory(kw[\"name\"], kw[\"description\"], kw[\"type\"], kw[\"content\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"save_memory\", \"description\": \"Save a persistent memory that survives across sessions.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\n \"name\": {\"type\": \"string\", \"description\": \"Short identifier (e.g. prefer_tabs, db_schema)\"},\n \"description\": {\"type\": \"string\", \"description\": \"One-line summary of what this memory captures\"},\n \"type\": {\"type\": \"string\", \"enum\": [\"user\", \"feedback\", \"project\", \"reference\"],\n \"description\": \"user=preferences, feedback=corrections, project=non-obvious project conventions or decision reasons, reference=external resource pointers\"},\n \"content\": {\"type\": \"string\", \"description\": \"Full memory content (multi-line OK)\"},\n }, \"required\": [\"name\", \"description\", \"type\", \"content\"]}},\n]\n\nMEMORY_GUIDANCE = \"\"\"\nWhen to save memories:\n- User states a preference (\"I like tabs\", \"always use pytest\") -> type: user\n- User corrects you (\"don't do X\", \"that was wrong because...\") -> type: feedback\n- You learn a project fact that is not easy to infer from current code alone\n (for example: a rule exists because of compliance, or a legacy module must\n stay untouched for business reasons) -> type: project\n- You learn where an external resource lives (ticket board, dashboard, docs URL)\n -> type: reference\n\nWhen NOT to save:\n- Anything easily derivable from code (function signatures, file structure, directory layout)\n- Temporary task state (current branch, open PR numbers, current TODOs)\n- Secrets or credentials (API keys, passwords)\n\"\"\"\n\n\ndef build_system_prompt() -> str:\n \"\"\"Assemble system prompt with memory content included.\"\"\"\n parts = [f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"]\n\n # Inject memory content if available\n memory_section = memory_mgr.load_memory_prompt()\n if memory_section:\n parts.append(memory_section)\n\n parts.append(MEMORY_GUIDANCE)\n return \"\\n\\n\".join(parts)\n\n\ndef agent_loop(messages: list):\n \"\"\"\n Agent loop with memory-aware system prompt.\n\n The system prompt is rebuilt each call so newly saved memories\n are visible in the next LLM turn within the same session.\n \"\"\"\n while True:\n system = build_system_prompt()\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n # Load existing memories at session start\n memory_mgr.load_all()\n mem_count = len(memory_mgr.memories)\n if mem_count:\n print(f\"[{mem_count} memories loaded into context]\")\n else:\n print(\"[No existing memories. The agent can create them with save_memory.]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n # /memories command to list current memories\n if query.strip() == \"/memories\":\n if memory_mgr.memories:\n for name, mem in memory_mgr.memories.items():\n print(f\" [{mem['type']}] {name}: {mem['description']}\")\n else:\n print(\" (no memories)\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): persistence(持久化)——跨会话边界保留记忆。\n\"\"\"\ns09_memory_system.py - Memory System(记忆系统)\n\n本教学版聚焦一个核心观点:\n有些信息应该跨会话保留,但并非所有内容都适合进入 memory(记忆)。\n\n建议写入 memory 的内容:\n - user preferences(用户偏好)\n - repeated user feedback(反复出现的用户反馈)\n - project facts that are NOT obvious from the current code(无法直接从当前代码显见的项目事实)\n - pointers to external resources(外部资源指针)\n\n不应写入 memory 的内容:\n - code structure that can be re-read from the repo(可从仓库重新读取的代码结构)\n - temporary task state(临时任务状态)\n - secrets(敏感密钥与口令)\n\n存储结构:\n .memory/\n MEMORY.md\n prefer_tabs.md\n review_style.md\n incident_board.md\n\n每条 memory 是带 frontmatter 的小型 Markdown 文件。\n智能体可通过 `save_memory()` 写入记忆,每次写入后会重建 memory 索引。\n\n可选的 “Dream” 流程可在后续执行归并、去重和清理。\n它很有用,但不是初学阶段第一优先。\n\n关键洞察:\n\"Memory 只保存跨会话仍有价值、且不易从当前仓库直接再推导的信息。\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nMEMORY_TYPES = (\"user\", \"feedback\", \"project\", \"reference\")\nMAX_INDEX_LINES = 200\n\n\nclass MemoryManager:\n \"\"\"\n 跨会话加载、构建并保存持久记忆。\n\n 教学版采用显式结构:\n 每条记忆一个 Markdown 文件,外加一个紧凑索引文件。\n \"\"\"\n\n def __init__(self, memory_dir: Path = None):\n self.memory_dir = memory_dir or MEMORY_DIR\n self.memories = {} # name(名称)-> {description, type, content}\n\n def load_all(self):\n \"\"\"加载 MEMORY.md 索引及全部记忆文件。\"\"\"\n self.memories = {}\n if not self.memory_dir.exists():\n return\n\n # 扫描除 MEMORY.md 外的所有 .md 文件\n for md_file in sorted(self.memory_dir.glob(\"*.md\")):\n if md_file.name == \"MEMORY.md\":\n continue\n parsed = self._parse_frontmatter(md_file.read_text())\n if parsed:\n name = parsed.get(\"name\", md_file.stem)\n self.memories[name] = {\n \"description\": parsed.get(\"description\", \"\"),\n \"type\": parsed.get(\"type\", \"project\"),\n \"content\": parsed.get(\"content\", \"\"),\n \"file\": md_file.name,\n }\n\n count = len(self.memories)\n if count > 0:\n print(f\"[Memory loaded: {count} memories from {self.memory_dir}]\")\n\n def load_memory_prompt(self) -> str:\n \"\"\"构建用于注入 system prompt 的 memory 区段。\"\"\"\n if not self.memories:\n return \"\"\n\n sections = []\n sections.append(\"# 记忆(跨会话持久化)\")\n sections.append(\"\")\n\n # 按类型分组,提升可读性\n for mem_type in MEMORY_TYPES:\n typed = {k: v for k, v in self.memories.items() if v[\"type\"] == mem_type}\n if not typed:\n continue\n sections.append(f\"## [{mem_type}]\")\n for name, mem in typed.items():\n sections.append(f\"### {name}: {mem['description']}\")\n if mem[\"content\"].strip():\n sections.append(mem[\"content\"].strip())\n sections.append(\"\")\n\n return \"\\n\".join(sections)\n\n def save_memory(self, name: str, description: str, mem_type: str, content: str) -> str:\n \"\"\"\n 将记忆写入磁盘并更新索引。\n\n 返回状态文本。\n \"\"\"\n if mem_type not in MEMORY_TYPES:\n return f\"Error: type 必须是 {MEMORY_TYPES} 之一\"\n\n # 文件名安全化\n safe_name = re.sub(r\"[^a-zA-Z0-9_-]\", \"_\", name.lower())\n if not safe_name:\n return \"Error: memory 名称无效\"\n\n self.memory_dir.mkdir(parents=True, exist_ok=True)\n\n # 写入单条记忆文件(frontmatter + 正文)\n frontmatter = (\n f\"---\\n\"\n f\"name: {name}\\n\"\n f\"description: {description}\\n\"\n f\"type: {mem_type}\\n\"\n f\"---\\n\"\n f\"{content}\\n\"\n )\n file_name = f\"{safe_name}.md\"\n file_path = self.memory_dir / file_name\n file_path.write_text(frontmatter)\n\n # 更新内存态\n self.memories[name] = {\n \"description\": description,\n \"type\": mem_type,\n \"content\": content,\n \"file\": file_name,\n }\n\n # 重建 MEMORY.md 索引\n self._rebuild_index()\n\n return f\"Saved memory '{name}' [{mem_type}] to {file_path.relative_to(WORKDIR)}\"\n\n def _rebuild_index(self):\n \"\"\"根据当前内存态重建 MEMORY.md,并限制在 200 行内。\"\"\"\n lines = [\"# Memory Index\", \"\"]\n for name, mem in self.memories.items():\n lines.append(f\"- {name}: {mem['description']} [{mem['type']}]\")\n if len(lines) >= MAX_INDEX_LINES:\n lines.append(f\"... (truncated at {MAX_INDEX_LINES} lines)\")\n break\n self.memory_dir.mkdir(parents=True, exist_ok=True)\n MEMORY_INDEX.write_text(\"\\n\".join(lines) + \"\\n\")\n\n def _parse_frontmatter(self, text: str) -> dict | None:\n \"\"\"解析 `---` 分隔的 frontmatter 与正文内容。\"\"\"\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n(.*)\", text, re.DOTALL)\n if not match:\n return None\n header, body = match.group(1), match.group(2)\n result = {\"content\": body.strip()}\n for line in header.splitlines():\n if \":\" in line:\n key, _, value = line.partition(\":\")\n result[key.strip()] = value.strip()\n return result\n\n\nclass DreamConsolidator:\n \"\"\"\n 会话间记忆自动归并(Dream)。\n\n 这是可选的后续能力,用于防止 memory 仓库长期膨胀成噪声集合:\n 通过合并、去重、清理维持记忆质量。\n \"\"\"\n\n COOLDOWN_SECONDS = 86400 # 归并之间至少间隔 24 小时\n SCAN_THROTTLE_SECONDS = 600 # 扫描尝试之间至少间隔 10 分钟\n MIN_SESSION_COUNT = 5 # 至少积累足够会话数据再归并\n LOCK_STALE_SECONDS = 3600 # PID 锁超过 1 小时视为陈旧\n\n PHASES = [\n \"Orient(定向): 扫描 MEMORY.md 索引,识别结构与分类\",\n \"Gather(采集): 读取各记忆文件,获取完整内容\",\n \"Consolidate(归并): 合并相关记忆并移除过期条目\",\n \"Prune(裁剪): 将 MEMORY.md 索引控制在 200 行以内\",\n ]\n\n def __init__(self, memory_dir: Path = None):\n self.memory_dir = memory_dir or MEMORY_DIR\n self.lock_file = self.memory_dir / \".dream_lock\"\n self.enabled = True\n self.mode = \"default\"\n self.last_consolidation_time = 0.0\n self.last_scan_time = 0.0\n self.session_count = 0\n\n def should_consolidate(self) -> tuple[bool, str]:\n \"\"\"\n 顺序检查 7 个 gate(闸门),全部通过才允许执行。\n 返回 `(can_run, reason)`,reason 表示首个未通过闸门。\n \"\"\"\n import time\n\n now = time.time()\n\n # Gate 1: enabled 开关\n if not self.enabled:\n return False, \"Gate 1: consolidation(归并)已禁用\"\n\n # Gate 2: memory 目录存在且包含记忆文件\n if not self.memory_dir.exists():\n return False, \"Gate 2: memory 目录不存在\"\n memory_files = list(self.memory_dir.glob(\"*.md\"))\n # 统计时排除 MEMORY.md 本身\n memory_files = [f for f in memory_files if f.name != \"MEMORY.md\"]\n if not memory_files:\n return False, \"Gate 2: 未找到 memory 文件\"\n\n # Gate 3: 非 plan 模式(仅在活跃模式允许归并)\n if self.mode == \"plan\":\n return False, \"Gate 3: plan 模式不允许归并\"\n\n # Gate 4: 距离上次归并满足 24 小时冷却\n time_since_last = now - self.last_consolidation_time\n if time_since_last < self.COOLDOWN_SECONDS:\n remaining = int(self.COOLDOWN_SECONDS - time_since_last)\n return False, f\"Gate 4: cooldown active, {remaining}s remaining\"\n\n # Gate 5: 距离上次扫描满足 10 分钟节流\n time_since_scan = now - self.last_scan_time\n if time_since_scan < self.SCAN_THROTTLE_SECONDS:\n remaining = int(self.SCAN_THROTTLE_SECONDS - time_since_scan)\n return False, f\"Gate 5: 扫描节流生效,还需等待 {remaining}s\"\n\n # Gate 6: 至少需要 5 个会话的数据积累\n if self.session_count < self.MIN_SESSION_COUNT:\n return False, f\"Gate 6: only {self.session_count} sessions, need {self.MIN_SESSION_COUNT}\"\n\n # Gate 7: 无活跃锁文件(并检查 PID 锁是否过期)\n if not self._acquire_lock():\n return False, \"Gate 7: 锁被其他进程持有\"\n\n return True, \"All 7 gates passed\"\n\n def consolidate(self) -> list[str]:\n \"\"\"\n 执行 4 阶段归并流程。\n\n 教学版直接返回阶段说明,用于可视化流程,\n 无需额外 LLM 归并调用。\n \"\"\"\n import time\n\n can_run, reason = self.should_consolidate()\n if not can_run:\n print(f\"[Dream] 无法归并:{reason}\")\n return []\n\n print(\"[Dream] 开始执行归并...\")\n self.last_scan_time = time.time()\n\n completed_phases = []\n for i, phase in enumerate(self.PHASES, 1):\n print(f\"[Dream] Phase {i}/4: {phase}\")\n completed_phases.append(phase)\n\n self.last_consolidation_time = time.time()\n self._release_lock()\n print(f\"[Dream] 归并完成:共执行 {len(completed_phases)} 个阶段\")\n return completed_phases\n\n def _acquire_lock(self) -> bool:\n \"\"\"\n 申请基于 PID 的锁文件。\n 若被其他活跃进程持有则返回 False。\n 过期锁(超过 LOCK_STALE_SECONDS)会被清除。\n \"\"\"\n import time\n\n if self.lock_file.exists():\n try:\n lock_data = self.lock_file.read_text().strip()\n pid_str, timestamp_str = lock_data.split(\":\", 1)\n pid = int(pid_str)\n lock_time = float(timestamp_str)\n\n # 检查锁是否过期\n if (time.time() - lock_time) > self.LOCK_STALE_SECONDS:\n print(f\"[Dream] 正在移除 PID {pid} 的陈旧锁\")\n self.lock_file.unlink()\n else:\n # 检查持锁进程是否仍存活\n try:\n os.kill(pid, 0)\n return False # 进程仍存活,锁有效\n except OSError:\n print(f\"[Dream] 正在移除已退出 PID {pid} 的锁\")\n self.lock_file.unlink()\n except (ValueError, OSError):\n # 锁文件损坏,直接删除\n self.lock_file.unlink(missing_ok=True)\n\n # 写入新锁\n try:\n self.memory_dir.mkdir(parents=True, exist_ok=True)\n self.lock_file.write_text(f\"{os.getpid()}:{time.time()}\")\n return True\n except OSError:\n return False\n\n def _release_lock(self):\n \"\"\"若锁归当前进程持有,则释放锁文件。\"\"\"\n try:\n if self.lock_file.exists():\n lock_data = self.lock_file.read_text().strip()\n pid_str = lock_data.split(\":\")[0]\n if int(pid_str) == os.getpid():\n self.lock_file.unlink()\n except (ValueError, OSError):\n pass\n\n\n# -- 工具实现 --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# 全局 memory 管理器\nmemory_mgr = MemoryManager()\n\n\ndef run_save_memory(name: str, description: str, mem_type: str, content: str) -> str:\n return memory_mgr.save_memory(name, description, mem_type, content)\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"save_memory\": lambda **kw: run_save_memory(kw[\"name\"], kw[\"description\"], kw[\"type\"], kw[\"content\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"save_memory\", \"description\": \"保存可跨会话保留的持久记忆。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\n \"name\": {\"type\": \"string\", \"description\": \"短标识(如 prefer_tabs, db_schema)\"},\n \"description\": {\"type\": \"string\", \"description\": \"该记忆的单行摘要\"},\n \"type\": {\"type\": \"string\", \"enum\": [\"user\", \"feedback\", \"project\", \"reference\"],\n \"description\": \"user=偏好,feedback=纠正,project=不易从代码直接推断的项目约束/决策原因,reference=外部资源指针\"},\n \"content\": {\"type\": \"string\", \"description\": \"记忆正文(可多行)\"},\n }, \"required\": [\"name\", \"description\", \"type\", \"content\"]}},\n]\n\nMEMORY_GUIDANCE = \"\"\"\n何时应保存 memory:\n- 用户表达偏好(例如“我喜欢 tabs”“总是用 pytest”)-> type: user\n- 用户纠正你(例如“不要这样做”“上次错在……”)-> type: feedback\n- 你获得了无法仅凭当前代码快速推断的项目事实\n (例如:某规则源于合规要求、某旧模块因业务原因不能动)-> type: project\n- 你确认了外部资源入口(工单看板、监控面板、文档 URL)-> type: reference\n\n何时不应保存:\n- 能从代码直接推导出的信息(函数签名、目录结构等)\n- 临时任务状态(当前分支、临时 PR 编号、当前 TODO)\n- 秘密或凭据(API Key、密码)\n\"\"\"\n\n\ndef build_system_prompt() -> str:\n \"\"\"组装包含 memory 内容的 system prompt。\"\"\"\n parts = [f\"你是位于 {WORKDIR} 的 coding agent(编码智能体),请使用工具解决任务。\"]\n\n # 若存在 memory,则注入记忆区段\n memory_section = memory_mgr.load_memory_prompt()\n if memory_section:\n parts.append(memory_section)\n\n parts.append(MEMORY_GUIDANCE)\n return \"\\n\\n\".join(parts)\n\n\ndef agent_loop(messages: list):\n \"\"\"\n 带 memory 感知的智能体循环。\n\n 每轮都会重建 system prompt,\n 以便新写入的记忆在同一会话下一轮即可生效。\n \"\"\"\n while True:\n system = build_system_prompt()\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n # 会话启动时加载已有记忆\n memory_mgr.load_all()\n mem_count = len(memory_mgr.memories)\n if mem_count:\n print(f\"[已将 {mem_count} 条记忆加载到上下文]\")\n else:\n print(\"[当前无记忆。智能体可通过 save_memory 创建。]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n # /memories 命令:查看当前记忆\n if query.strip() == \"/memories\":\n if memory_mgr.memories:\n for name, mem in memory_mgr.memories.items():\n print(f\" [{mem['type']}] {name}: {mem['description']}\")\n else:\n print(\" (无记忆)\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: persistence -- remembering across the session boundary.\n\"\"\"\ns09_memory_system.py - Memory System\n\nThis teaching version focuses on one core idea:\nsome information should survive the current conversation, but not everything\nbelongs in memory.\n\nUse memory for:\n - user preferences\n - repeated user feedback\n - project facts that are NOT obvious from the current code\n - pointers to external resources\n\nDo NOT use memory for:\n - code structure that can be re-read from the repo\n - temporary task state\n - secrets\n\nStorage layout:\n .memory/\n MEMORY.md\n prefer_tabs.md\n review_style.md\n incident_board.md\n\nEach memory is a small Markdown file with frontmatter.\nThe agent can save a memory through save_memory(), and the memory index\nis rebuilt after each write.\n\nAn optional \"Dream\" pass can later consolidate, deduplicate, and prune\nstored memories. It is useful, but it is not the first thing readers need\nto understand.\n\nKey insight: \"Memory only stores cross-session information that is still\nworth recalling later and is not easy to re-derive from the current repo.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nMEMORY_TYPES = (\"user\", \"feedback\", \"project\", \"reference\")\nMAX_INDEX_LINES = 200\n\n\nclass MemoryManager:\n \"\"\"\n Load, build, and save persistent memories across sessions.\n\n The teaching version keeps memory explicit:\n one Markdown file per memory, plus one compact index file.\n \"\"\"\n\n def __init__(self, memory_dir: Path = None):\n self.memory_dir = memory_dir or MEMORY_DIR\n self.memories = {} # name -> {description, type, content}\n\n def load_all(self):\n \"\"\"Load MEMORY.md index and all individual memory files.\"\"\"\n self.memories = {}\n if not self.memory_dir.exists():\n return\n\n # Scan all .md files except MEMORY.md\n for md_file in sorted(self.memory_dir.glob(\"*.md\")):\n if md_file.name == \"MEMORY.md\":\n continue\n parsed = self._parse_frontmatter(md_file.read_text())\n if parsed:\n name = parsed.get(\"name\", md_file.stem)\n self.memories[name] = {\n \"description\": parsed.get(\"description\", \"\"),\n \"type\": parsed.get(\"type\", \"project\"),\n \"content\": parsed.get(\"content\", \"\"),\n \"file\": md_file.name,\n }\n\n count = len(self.memories)\n if count > 0:\n print(f\"[Memory loaded: {count} memories from {self.memory_dir}]\")\n\n def load_memory_prompt(self) -> str:\n \"\"\"Build a memory section for injection into the system prompt.\"\"\"\n if not self.memories:\n return \"\"\n\n sections = []\n sections.append(\"# Memories (persistent across sessions)\")\n sections.append(\"\")\n\n # Group by type for readability\n for mem_type in MEMORY_TYPES:\n typed = {k: v for k, v in self.memories.items() if v[\"type\"] == mem_type}\n if not typed:\n continue\n sections.append(f\"## [{mem_type}]\")\n for name, mem in typed.items():\n sections.append(f\"### {name}: {mem['description']}\")\n if mem[\"content\"].strip():\n sections.append(mem[\"content\"].strip())\n sections.append(\"\")\n\n return \"\\n\".join(sections)\n\n def save_memory(self, name: str, description: str, mem_type: str, content: str) -> str:\n \"\"\"\n Save a memory to disk and update the index.\n\n Returns a status message.\n \"\"\"\n if mem_type not in MEMORY_TYPES:\n return f\"Error: type must be one of {MEMORY_TYPES}\"\n\n # Sanitize name for filename\n safe_name = re.sub(r\"[^a-zA-Z0-9_-]\", \"_\", name.lower())\n if not safe_name:\n return \"Error: invalid memory name\"\n\n self.memory_dir.mkdir(parents=True, exist_ok=True)\n\n # Write individual memory file with frontmatter\n frontmatter = (\n f\"---\\n\"\n f\"name: {name}\\n\"\n f\"description: {description}\\n\"\n f\"type: {mem_type}\\n\"\n f\"---\\n\"\n f\"{content}\\n\"\n )\n file_name = f\"{safe_name}.md\"\n file_path = self.memory_dir / file_name\n file_path.write_text(frontmatter)\n\n # Update in-memory store\n self.memories[name] = {\n \"description\": description,\n \"type\": mem_type,\n \"content\": content,\n \"file\": file_name,\n }\n\n # Rebuild MEMORY.md index\n self._rebuild_index()\n\n return f\"Saved memory '{name}' [{mem_type}] to {file_path.relative_to(WORKDIR)}\"\n\n def _rebuild_index(self):\n \"\"\"Rebuild MEMORY.md from current in-memory state, capped at 200 lines.\"\"\"\n lines = [\"# Memory Index\", \"\"]\n for name, mem in self.memories.items():\n lines.append(f\"- {name}: {mem['description']} [{mem['type']}]\")\n if len(lines) >= MAX_INDEX_LINES:\n lines.append(f\"... (truncated at {MAX_INDEX_LINES} lines)\")\n break\n self.memory_dir.mkdir(parents=True, exist_ok=True)\n MEMORY_INDEX.write_text(\"\\n\".join(lines) + \"\\n\")\n\n def _parse_frontmatter(self, text: str) -> dict | None:\n \"\"\"Parse --- delimited frontmatter + body content.\"\"\"\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n(.*)\", text, re.DOTALL)\n if not match:\n return None\n header, body = match.group(1), match.group(2)\n result = {\"content\": body.strip()}\n for line in header.splitlines():\n if \":\" in line:\n key, _, value = line.partition(\":\")\n result[key.strip()] = value.strip()\n return result\n\n\nclass DreamConsolidator:\n \"\"\"\n Auto-consolidation of memories between sessions (\"Dream\").\n\n This is an optional later-stage feature. Its job is to prevent the memory\n store from growing into a noisy pile by merging, deduplicating, and\n pruning entries over time.\n \"\"\"\n\n COOLDOWN_SECONDS = 86400 # 24 hours between consolidations\n SCAN_THROTTLE_SECONDS = 600 # 10 minutes between scan attempts\n MIN_SESSION_COUNT = 5 # need enough data to consolidate\n LOCK_STALE_SECONDS = 3600 # PID lock considered stale after 1 hour\n\n PHASES = [\n \"Orient: scan MEMORY.md index for structure and categories\",\n \"Gather: read individual memory files for full content\",\n \"Consolidate: merge related memories, remove stale entries\",\n \"Prune: enforce 200-line limit on MEMORY.md index\",\n ]\n\n def __init__(self, memory_dir: Path = None):\n self.memory_dir = memory_dir or MEMORY_DIR\n self.lock_file = self.memory_dir / \".dream_lock\"\n self.enabled = True\n self.mode = \"default\"\n self.last_consolidation_time = 0.0\n self.last_scan_time = 0.0\n self.session_count = 0\n\n def should_consolidate(self) -> tuple[bool, str]:\n \"\"\"\n Check 7 gates in sequence. All must pass.\n Returns (can_run, reason) where reason explains the first failed gate.\n \"\"\"\n import time\n\n now = time.time()\n\n # Gate 1: enabled flag\n if not self.enabled:\n return False, \"Gate 1: consolidation is disabled\"\n\n # Gate 2: memory directory exists and has memory files\n if not self.memory_dir.exists():\n return False, \"Gate 2: memory directory does not exist\"\n memory_files = list(self.memory_dir.glob(\"*.md\"))\n # Exclude MEMORY.md itself from the count\n memory_files = [f for f in memory_files if f.name != \"MEMORY.md\"]\n if not memory_files:\n return False, \"Gate 2: no memory files found\"\n\n # Gate 3: not in plan mode (only consolidate in active modes)\n if self.mode == \"plan\":\n return False, \"Gate 3: plan mode does not allow consolidation\"\n\n # Gate 4: 24-hour cooldown since last consolidation\n time_since_last = now - self.last_consolidation_time\n if time_since_last < self.COOLDOWN_SECONDS:\n remaining = int(self.COOLDOWN_SECONDS - time_since_last)\n return False, f\"Gate 4: cooldown active, {remaining}s remaining\"\n\n # Gate 5: 10-minute throttle since last scan attempt\n time_since_scan = now - self.last_scan_time\n if time_since_scan < self.SCAN_THROTTLE_SECONDS:\n remaining = int(self.SCAN_THROTTLE_SECONDS - time_since_scan)\n return False, f\"Gate 5: scan throttle active, {remaining}s remaining\"\n\n # Gate 6: need at least 5 sessions worth of data\n if self.session_count < self.MIN_SESSION_COUNT:\n return False, f\"Gate 6: only {self.session_count} sessions, need {self.MIN_SESSION_COUNT}\"\n\n # Gate 7: no active lock file (check PID staleness)\n if not self._acquire_lock():\n return False, \"Gate 7: lock held by another process\"\n\n return True, \"All 7 gates passed\"\n\n def consolidate(self) -> list[str]:\n \"\"\"\n Run the 4-phase consolidation process.\n\n The teaching version returns phase descriptions to make the flow\n visible without requiring an extra LLM pass here.\n \"\"\"\n import time\n\n can_run, reason = self.should_consolidate()\n if not can_run:\n print(f\"[Dream] Cannot consolidate: {reason}\")\n return []\n\n print(\"[Dream] Starting consolidation...\")\n self.last_scan_time = time.time()\n\n completed_phases = []\n for i, phase in enumerate(self.PHASES, 1):\n print(f\"[Dream] Phase {i}/4: {phase}\")\n completed_phases.append(phase)\n\n self.last_consolidation_time = time.time()\n self._release_lock()\n print(f\"[Dream] Consolidation complete: {len(completed_phases)} phases executed\")\n return completed_phases\n\n def _acquire_lock(self) -> bool:\n \"\"\"\n Acquire a PID-based lock file. Returns False if locked by another\n live process. Stale locks (older than LOCK_STALE_SECONDS) are removed.\n \"\"\"\n import time\n\n if self.lock_file.exists():\n try:\n lock_data = self.lock_file.read_text().strip()\n pid_str, timestamp_str = lock_data.split(\":\", 1)\n pid = int(pid_str)\n lock_time = float(timestamp_str)\n\n # Check if lock is stale\n if (time.time() - lock_time) > self.LOCK_STALE_SECONDS:\n print(f\"[Dream] Removing stale lock from PID {pid}\")\n self.lock_file.unlink()\n else:\n # Check if owning process is still alive\n try:\n os.kill(pid, 0)\n return False # process alive, lock is valid\n except OSError:\n print(f\"[Dream] Removing lock from dead PID {pid}\")\n self.lock_file.unlink()\n except (ValueError, OSError):\n # Corrupted lock file, remove it\n self.lock_file.unlink(missing_ok=True)\n\n # Write new lock\n try:\n self.memory_dir.mkdir(parents=True, exist_ok=True)\n self.lock_file.write_text(f\"{os.getpid()}:{time.time()}\")\n return True\n except OSError:\n return False\n\n def _release_lock(self):\n \"\"\"Release the lock file if we own it.\"\"\"\n try:\n if self.lock_file.exists():\n lock_data = self.lock_file.read_text().strip()\n pid_str = lock_data.split(\":\")[0]\n if int(pid_str) == os.getpid():\n self.lock_file.unlink()\n except (ValueError, OSError):\n pass\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Global memory manager\nmemory_mgr = MemoryManager()\n\n\ndef run_save_memory(name: str, description: str, mem_type: str, content: str) -> str:\n return memory_mgr.save_memory(name, description, mem_type, content)\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"save_memory\": lambda **kw: run_save_memory(kw[\"name\"], kw[\"description\"], kw[\"type\"], kw[\"content\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"save_memory\", \"description\": \"Save a persistent memory that survives across sessions.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\n \"name\": {\"type\": \"string\", \"description\": \"Short identifier (e.g. prefer_tabs, db_schema)\"},\n \"description\": {\"type\": \"string\", \"description\": \"One-line summary of what this memory captures\"},\n \"type\": {\"type\": \"string\", \"enum\": [\"user\", \"feedback\", \"project\", \"reference\"],\n \"description\": \"user=preferences, feedback=corrections, project=non-obvious project conventions or decision reasons, reference=external resource pointers\"},\n \"content\": {\"type\": \"string\", \"description\": \"Full memory content (multi-line OK)\"},\n }, \"required\": [\"name\", \"description\", \"type\", \"content\"]}},\n]\n\nMEMORY_GUIDANCE = \"\"\"\nWhen to save memories:\n- User states a preference (\"I like tabs\", \"always use pytest\") -> type: user\n- User corrects you (\"don't do X\", \"that was wrong because...\") -> type: feedback\n- You learn a project fact that is not easy to infer from current code alone\n (for example: a rule exists because of compliance, or a legacy module must\n stay untouched for business reasons) -> type: project\n- You learn where an external resource lives (ticket board, dashboard, docs URL)\n -> type: reference\n\nWhen NOT to save:\n- Anything easily derivable from code (function signatures, file structure, directory layout)\n- Temporary task state (current branch, open PR numbers, current TODOs)\n- Secrets or credentials (API keys, passwords)\n\"\"\"\n\n\ndef build_system_prompt() -> str:\n \"\"\"Assemble system prompt with memory content included.\"\"\"\n parts = [f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"]\n\n # Inject memory content if available\n memory_section = memory_mgr.load_memory_prompt()\n if memory_section:\n parts.append(memory_section)\n\n parts.append(MEMORY_GUIDANCE)\n return \"\\n\\n\".join(parts)\n\n\ndef agent_loop(messages: list):\n \"\"\"\n Agent loop with memory-aware system prompt.\n\n The system prompt is rebuilt each call so newly saved memories\n are visible in the next LLM turn within the same session.\n \"\"\"\n while True:\n system = build_system_prompt()\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n # Load existing memories at session start\n memory_mgr.load_all()\n mem_count = len(memory_mgr.memories)\n if mem_count:\n print(f\"[{mem_count} memories loaded into context]\")\n else:\n print(\"[No existing memories. The agent can create them with save_memory.]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n # /memories command to list current memories\n if query.strip() == \"/memories\":\n if memory_mgr.memories:\n for name, mem in memory_mgr.memories.items():\n print(f\" [{mem['type']}] {name}: {mem['description']}\")\n else:\n print(\" (no memories)\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: persistence -- remembering across the session boundary.\n\"\"\"\ns09_memory_system.py - Memory System\n\nThis teaching version focuses on one core idea:\nsome information should survive the current conversation, but not everything\nbelongs in memory.\n\nUse memory for:\n - user preferences\n - repeated user feedback\n - project facts that are NOT obvious from the current code\n - pointers to external resources\n\nDo NOT use memory for:\n - code structure that can be re-read from the repo\n - temporary task state\n - secrets\n\nStorage layout:\n .memory/\n MEMORY.md\n prefer_tabs.md\n review_style.md\n incident_board.md\n\nEach memory is a small Markdown file with frontmatter.\nThe agent can save a memory through save_memory(), and the memory index\nis rebuilt after each write.\n\nAn optional \"Dream\" pass can later consolidate, deduplicate, and prune\nstored memories. It is useful, but it is not the first thing readers need\nto understand.\n\nKey insight: \"Memory only stores cross-session information that is still\nworth recalling later and is not easy to re-derive from the current repo.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nMEMORY_DIR = WORKDIR / \".memory\"\nMEMORY_INDEX = MEMORY_DIR / \"MEMORY.md\"\nMEMORY_TYPES = (\"user\", \"feedback\", \"project\", \"reference\")\nMAX_INDEX_LINES = 200\n\n\nclass MemoryManager:\n \"\"\"\n Load, build, and save persistent memories across sessions.\n\n The teaching version keeps memory explicit:\n one Markdown file per memory, plus one compact index file.\n \"\"\"\n\n def __init__(self, memory_dir: Path = None):\n self.memory_dir = memory_dir or MEMORY_DIR\n self.memories = {} # name -> {description, type, content}\n\n def load_all(self):\n \"\"\"Load MEMORY.md index and all individual memory files.\"\"\"\n self.memories = {}\n if not self.memory_dir.exists():\n return\n\n # Scan all .md files except MEMORY.md\n for md_file in sorted(self.memory_dir.glob(\"*.md\")):\n if md_file.name == \"MEMORY.md\":\n continue\n parsed = self._parse_frontmatter(md_file.read_text())\n if parsed:\n name = parsed.get(\"name\", md_file.stem)\n self.memories[name] = {\n \"description\": parsed.get(\"description\", \"\"),\n \"type\": parsed.get(\"type\", \"project\"),\n \"content\": parsed.get(\"content\", \"\"),\n \"file\": md_file.name,\n }\n\n count = len(self.memories)\n if count > 0:\n print(f\"[Memory loaded: {count} memories from {self.memory_dir}]\")\n\n def load_memory_prompt(self) -> str:\n \"\"\"Build a memory section for injection into the system prompt.\"\"\"\n if not self.memories:\n return \"\"\n\n sections = []\n sections.append(\"# Memories (persistent across sessions)\")\n sections.append(\"\")\n\n # Group by type for readability\n for mem_type in MEMORY_TYPES:\n typed = {k: v for k, v in self.memories.items() if v[\"type\"] == mem_type}\n if not typed:\n continue\n sections.append(f\"## [{mem_type}]\")\n for name, mem in typed.items():\n sections.append(f\"### {name}: {mem['description']}\")\n if mem[\"content\"].strip():\n sections.append(mem[\"content\"].strip())\n sections.append(\"\")\n\n return \"\\n\".join(sections)\n\n def save_memory(self, name: str, description: str, mem_type: str, content: str) -> str:\n \"\"\"\n Save a memory to disk and update the index.\n\n Returns a status message.\n \"\"\"\n if mem_type not in MEMORY_TYPES:\n return f\"Error: type must be one of {MEMORY_TYPES}\"\n\n # Sanitize name for filename\n safe_name = re.sub(r\"[^a-zA-Z0-9_-]\", \"_\", name.lower())\n if not safe_name:\n return \"Error: invalid memory name\"\n\n self.memory_dir.mkdir(parents=True, exist_ok=True)\n\n # Write individual memory file with frontmatter\n frontmatter = (\n f\"---\\n\"\n f\"name: {name}\\n\"\n f\"description: {description}\\n\"\n f\"type: {mem_type}\\n\"\n f\"---\\n\"\n f\"{content}\\n\"\n )\n file_name = f\"{safe_name}.md\"\n file_path = self.memory_dir / file_name\n file_path.write_text(frontmatter)\n\n # Update in-memory store\n self.memories[name] = {\n \"description\": description,\n \"type\": mem_type,\n \"content\": content,\n \"file\": file_name,\n }\n\n # Rebuild MEMORY.md index\n self._rebuild_index()\n\n return f\"Saved memory '{name}' [{mem_type}] to {file_path.relative_to(WORKDIR)}\"\n\n def _rebuild_index(self):\n \"\"\"Rebuild MEMORY.md from current in-memory state, capped at 200 lines.\"\"\"\n lines = [\"# Memory Index\", \"\"]\n for name, mem in self.memories.items():\n lines.append(f\"- {name}: {mem['description']} [{mem['type']}]\")\n if len(lines) >= MAX_INDEX_LINES:\n lines.append(f\"... (truncated at {MAX_INDEX_LINES} lines)\")\n break\n self.memory_dir.mkdir(parents=True, exist_ok=True)\n MEMORY_INDEX.write_text(\"\\n\".join(lines) + \"\\n\")\n\n def _parse_frontmatter(self, text: str) -> dict | None:\n \"\"\"Parse --- delimited frontmatter + body content.\"\"\"\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n(.*)\", text, re.DOTALL)\n if not match:\n return None\n header, body = match.group(1), match.group(2)\n result = {\"content\": body.strip()}\n for line in header.splitlines():\n if \":\" in line:\n key, _, value = line.partition(\":\")\n result[key.strip()] = value.strip()\n return result\n\n\nclass DreamConsolidator:\n \"\"\"\n Auto-consolidation of memories between sessions (\"Dream\").\n\n This is an optional later-stage feature. Its job is to prevent the memory\n store from growing into a noisy pile by merging, deduplicating, and\n pruning entries over time.\n \"\"\"\n\n COOLDOWN_SECONDS = 86400 # 24 hours between consolidations\n SCAN_THROTTLE_SECONDS = 600 # 10 minutes between scan attempts\n MIN_SESSION_COUNT = 5 # need enough data to consolidate\n LOCK_STALE_SECONDS = 3600 # PID lock considered stale after 1 hour\n\n PHASES = [\n \"Orient: scan MEMORY.md index for structure and categories\",\n \"Gather: read individual memory files for full content\",\n \"Consolidate: merge related memories, remove stale entries\",\n \"Prune: enforce 200-line limit on MEMORY.md index\",\n ]\n\n def __init__(self, memory_dir: Path = None):\n self.memory_dir = memory_dir or MEMORY_DIR\n self.lock_file = self.memory_dir / \".dream_lock\"\n self.enabled = True\n self.mode = \"default\"\n self.last_consolidation_time = 0.0\n self.last_scan_time = 0.0\n self.session_count = 0\n\n def should_consolidate(self) -> tuple[bool, str]:\n \"\"\"\n Check 7 gates in sequence. All must pass.\n Returns (can_run, reason) where reason explains the first failed gate.\n \"\"\"\n import time\n\n now = time.time()\n\n # Gate 1: enabled flag\n if not self.enabled:\n return False, \"Gate 1: consolidation is disabled\"\n\n # Gate 2: memory directory exists and has memory files\n if not self.memory_dir.exists():\n return False, \"Gate 2: memory directory does not exist\"\n memory_files = list(self.memory_dir.glob(\"*.md\"))\n # Exclude MEMORY.md itself from the count\n memory_files = [f for f in memory_files if f.name != \"MEMORY.md\"]\n if not memory_files:\n return False, \"Gate 2: no memory files found\"\n\n # Gate 3: not in plan mode (only consolidate in active modes)\n if self.mode == \"plan\":\n return False, \"Gate 3: plan mode does not allow consolidation\"\n\n # Gate 4: 24-hour cooldown since last consolidation\n time_since_last = now - self.last_consolidation_time\n if time_since_last < self.COOLDOWN_SECONDS:\n remaining = int(self.COOLDOWN_SECONDS - time_since_last)\n return False, f\"Gate 4: cooldown active, {remaining}s remaining\"\n\n # Gate 5: 10-minute throttle since last scan attempt\n time_since_scan = now - self.last_scan_time\n if time_since_scan < self.SCAN_THROTTLE_SECONDS:\n remaining = int(self.SCAN_THROTTLE_SECONDS - time_since_scan)\n return False, f\"Gate 5: scan throttle active, {remaining}s remaining\"\n\n # Gate 6: need at least 5 sessions worth of data\n if self.session_count < self.MIN_SESSION_COUNT:\n return False, f\"Gate 6: only {self.session_count} sessions, need {self.MIN_SESSION_COUNT}\"\n\n # Gate 7: no active lock file (check PID staleness)\n if not self._acquire_lock():\n return False, \"Gate 7: lock held by another process\"\n\n return True, \"All 7 gates passed\"\n\n def consolidate(self) -> list[str]:\n \"\"\"\n Run the 4-phase consolidation process.\n\n The teaching version returns phase descriptions to make the flow\n visible without requiring an extra LLM pass here.\n \"\"\"\n import time\n\n can_run, reason = self.should_consolidate()\n if not can_run:\n print(f\"[Dream] Cannot consolidate: {reason}\")\n return []\n\n print(\"[Dream] Starting consolidation...\")\n self.last_scan_time = time.time()\n\n completed_phases = []\n for i, phase in enumerate(self.PHASES, 1):\n print(f\"[Dream] Phase {i}/4: {phase}\")\n completed_phases.append(phase)\n\n self.last_consolidation_time = time.time()\n self._release_lock()\n print(f\"[Dream] Consolidation complete: {len(completed_phases)} phases executed\")\n return completed_phases\n\n def _acquire_lock(self) -> bool:\n \"\"\"\n Acquire a PID-based lock file. Returns False if locked by another\n live process. Stale locks (older than LOCK_STALE_SECONDS) are removed.\n \"\"\"\n import time\n\n if self.lock_file.exists():\n try:\n lock_data = self.lock_file.read_text().strip()\n pid_str, timestamp_str = lock_data.split(\":\", 1)\n pid = int(pid_str)\n lock_time = float(timestamp_str)\n\n # Check if lock is stale\n if (time.time() - lock_time) > self.LOCK_STALE_SECONDS:\n print(f\"[Dream] Removing stale lock from PID {pid}\")\n self.lock_file.unlink()\n else:\n # Check if owning process is still alive\n try:\n os.kill(pid, 0)\n return False # process alive, lock is valid\n except OSError:\n print(f\"[Dream] Removing lock from dead PID {pid}\")\n self.lock_file.unlink()\n except (ValueError, OSError):\n # Corrupted lock file, remove it\n self.lock_file.unlink(missing_ok=True)\n\n # Write new lock\n try:\n self.memory_dir.mkdir(parents=True, exist_ok=True)\n self.lock_file.write_text(f\"{os.getpid()}:{time.time()}\")\n return True\n except OSError:\n return False\n\n def _release_lock(self):\n \"\"\"Release the lock file if we own it.\"\"\"\n try:\n if self.lock_file.exists():\n lock_data = self.lock_file.read_text().strip()\n pid_str = lock_data.split(\":\")[0]\n if int(pid_str) == os.getpid():\n self.lock_file.unlink()\n except (ValueError, OSError):\n pass\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# Global memory manager\nmemory_mgr = MemoryManager()\n\n\ndef run_save_memory(name: str, description: str, mem_type: str, content: str) -> str:\n return memory_mgr.save_memory(name, description, mem_type, content)\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"save_memory\": lambda **kw: run_save_memory(kw[\"name\"], kw[\"description\"], kw[\"type\"], kw[\"content\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"save_memory\", \"description\": \"Save a persistent memory that survives across sessions.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\n \"name\": {\"type\": \"string\", \"description\": \"Short identifier (e.g. prefer_tabs, db_schema)\"},\n \"description\": {\"type\": \"string\", \"description\": \"One-line summary of what this memory captures\"},\n \"type\": {\"type\": \"string\", \"enum\": [\"user\", \"feedback\", \"project\", \"reference\"],\n \"description\": \"user=preferences, feedback=corrections, project=non-obvious project conventions or decision reasons, reference=external resource pointers\"},\n \"content\": {\"type\": \"string\", \"description\": \"Full memory content (multi-line OK)\"},\n }, \"required\": [\"name\", \"description\", \"type\", \"content\"]}},\n]\n\nMEMORY_GUIDANCE = \"\"\"\nWhen to save memories:\n- User states a preference (\"I like tabs\", \"always use pytest\") -> type: user\n- User corrects you (\"don't do X\", \"that was wrong because...\") -> type: feedback\n- You learn a project fact that is not easy to infer from current code alone\n (for example: a rule exists because of compliance, or a legacy module must\n stay untouched for business reasons) -> type: project\n- You learn where an external resource lives (ticket board, dashboard, docs URL)\n -> type: reference\n\nWhen NOT to save:\n- Anything easily derivable from code (function signatures, file structure, directory layout)\n- Temporary task state (current branch, open PR numbers, current TODOs)\n- Secrets or credentials (API keys, passwords)\n\"\"\"\n\n\ndef build_system_prompt() -> str:\n \"\"\"Assemble system prompt with memory content included.\"\"\"\n parts = [f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"]\n\n # Inject memory content if available\n memory_section = memory_mgr.load_memory_prompt()\n if memory_section:\n parts.append(memory_section)\n\n parts.append(MEMORY_GUIDANCE)\n return \"\\n\\n\".join(parts)\n\n\ndef agent_loop(messages: list):\n \"\"\"\n Agent loop with memory-aware system prompt.\n\n The system prompt is rebuilt each call so newly saved memories\n are visible in the next LLM turn within the same session.\n \"\"\"\n while True:\n system = build_system_prompt()\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n # Load existing memories at session start\n memory_mgr.load_all()\n mem_count = len(memory_mgr.memories)\n if mem_count:\n print(f\"[{mem_count} memories loaded into context]\")\n else:\n print(\"[No existing memories. The agent can create them with save_memory.]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n # /memories command to list current memories\n if query.strip() == \"/memories\":\n if memory_mgr.memories:\n for name, mem in memory_mgr.memories.items():\n print(f\" [{mem['type']}] {name}: {mem['description']}\")\n else:\n print(\" (no memories)\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
},
{
- "id": "s05",
- "filename": "s05_skill_loading.py",
- "title": "Skills",
- "subtitle": "Load on Demand",
- "loc": 187,
+ "id": "s10",
+ "filename": "s10_system_prompt.py",
+ "title": "System Prompt",
+ "subtitle": "Build Inputs as a Pipeline",
+ "loc": 305,
"tools": [
"bash",
"read_file",
"write_file",
- "edit_file",
- "load_skill"
- ],
- "newTools": [
- "load_skill"
+ "edit_file"
],
- "coreAddition": "SkillLoader + two-layer injection",
- "keyInsight": "Inject knowledge via tool_result when needed, not upfront in the system prompt",
+ "newTools": [],
+ "coreAddition": "Prompt sections + dynamic assembly",
+ "keyInsight": "The model sees a constructed input pipeline, not one giant static string.",
"classes": [
{
- "name": "SkillLoader",
- "startLine": 57,
- "endLine": 105
+ "name": "SystemPromptBuilder",
+ "startLine": 50,
+ "endLine": 224
}
],
"functions": [
+ {
+ "name": "build_system_reminder",
+ "signature": "def build_system_reminder(extra: str = None)",
+ "startLine": 225
+ },
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
- "startLine": 117
+ "startLine": 242
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
- "startLine": 123
+ "startLine": 249
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
- "startLine": 135
+ "startLine": 262
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
- "startLine": 144
+ "startLine": 272
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
- "startLine": 153
+ "startLine": 282
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 187
+ "startLine": 316
}
],
- "layer": "planning",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns05_skill_loading.py - Skills\n\nTwo-layer skill injection that avoids bloating the system prompt:\n\n Layer 1 (cheap): skill names in system prompt (~100 tokens/skill)\n Layer 2 (on demand): full skill body in tool_result\n\n skills/\n pdf/\n SKILL.md <-- frontmatter (name, description) + body\n code-review/\n SKILL.md\n\n System prompt:\n +--------------------------------------+\n | You are a coding agent. |\n | Skills available: |\n | - pdf: Process PDF files... | <-- Layer 1: metadata only\n | - code-review: Review code... |\n +--------------------------------------+\n\n When model calls load_skill(\"pdf\"):\n +--------------------------------------+\n | tool_result: |\n | |\n | Full PDF processing instructions | <-- Layer 2: full body\n | Step 1: ... |\n | Step 2: ... |\n | |\n +--------------------------------------+\n\nKey insight: \"Don't put everything in the system prompt. Load on demand.\"\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nSKILLS_DIR = WORKDIR / \"skills\"\n\n\n# -- SkillLoader: scan skills//SKILL.md with YAML frontmatter --\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills_dir = skills_dir\n self.skills = {}\n self._load_all()\n\n def _load_all(self):\n if not self.skills_dir.exists():\n return\n for f in sorted(self.skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body, \"path\": str(f)}\n\n def _parse_frontmatter(self, text: str) -> tuple:\n \"\"\"Parse YAML frontmatter between --- delimiters.\"\"\"\n match = re.match(r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL)\n if not match:\n return {}, text\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" in line:\n key, val = line.split(\":\", 1)\n meta[key.strip()] = val.strip()\n return meta, match.group(2).strip()\n\n def get_descriptions(self) -> str:\n \"\"\"Layer 1: short descriptions for the system prompt.\"\"\"\n if not self.skills:\n return \"(no skills available)\"\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"No description\")\n tags = skill[\"meta\"].get(\"tags\", \"\")\n line = f\" - {name}: {desc}\"\n if tags:\n line += f\" [{tags}]\"\n lines.append(line)\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n \"\"\"Layer 2: full skill body returned in tool_result.\"\"\"\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}\"\n return f\"\\n{skill['body']}\\n \"\n\n\nSKILL_LOADER = SkillLoader(SKILLS_DIR)\n\n# Layer 1: skill metadata injected into system prompt\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse load_skill to access specialized knowledge before tackling unfamiliar topics.\n\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"load_skill\", \"description\": \"Load specialized knowledge by name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\", \"description\": \"Skill name to load\"}}, \"required\": [\"name\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms05 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ "layer": "hardening",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: assembly -- the system prompt is a pipeline, not a string.\n\"\"\"\ns10_system_prompt.py - System Prompt Construction\n\nThis chapter teaches one core idea:\nthe system prompt should be assembled from clear sections, not written as one\ngiant hardcoded blob.\n\nTeaching pipeline:\n 1. core instructions\n 2. tool listing\n 3. skill metadata\n 4. memory section\n 5. CLAUDE.md chain\n 6. dynamic context\n\nThe builder keeps stable information separate from information that changes\noften. A simple DYNAMIC_BOUNDARY marker makes that split visible.\n\nPer-turn reminders are even more dynamic. They are better injected as a\nseparate user-role system reminder than mixed blindly into the stable prompt.\n\nKey insight: \"Prompt construction is a pipeline with boundaries, not one\nbig string.\"\n\"\"\"\n\nimport datetime\nimport json\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nDYNAMIC_BOUNDARY = \"=== DYNAMIC_BOUNDARY ===\"\n\n\nclass SystemPromptBuilder:\n \"\"\"\n Assemble the system prompt from independent sections.\n\n The teaching goal here is clarity:\n each section has one source and one responsibility.\n\n That makes the prompt easier to reason about, easier to test, and easier\n to evolve as the agent grows new capabilities.\n \"\"\"\n\n def __init__(self, workdir: Path = None, tools: list = None):\n self.workdir = workdir or WORKDIR\n self.tools = tools or []\n self.skills_dir = self.workdir / \"skills\"\n self.memory_dir = self.workdir / \".memory\"\n\n # -- Section 1: Core instructions --\n def _build_core(self) -> str:\n return (\n f\"You are a coding agent operating in {self.workdir}.\\n\"\n \"Use the provided tools to explore, read, write, and edit files.\\n\"\n \"Always verify before assuming. Prefer reading files over guessing.\"\n )\n\n # -- Section 2: Tool listings --\n def _build_tool_listing(self) -> str:\n if not self.tools:\n return \"\"\n lines = [\"# Available tools\"]\n for tool in self.tools:\n props = tool.get(\"input_schema\", {}).get(\"properties\", {})\n params = \", \".join(props.keys())\n lines.append(f\"- {tool['name']}({params}): {tool['description']}\")\n return \"\\n\".join(lines)\n\n # -- Section 3: Skill metadata (layer 1 from s05 concept) --\n def _build_skill_listing(self) -> str:\n if not self.skills_dir.exists():\n return \"\"\n skills = []\n for skill_dir in sorted(self.skills_dir.iterdir()):\n skill_md = skill_dir / \"SKILL.md\"\n if not skill_md.exists():\n continue\n text = skill_md.read_text()\n # Parse frontmatter for name + description\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\", text, re.DOTALL)\n if not match:\n continue\n meta = {}\n for line in match.group(1).splitlines():\n if \":\" in line:\n k, _, v = line.partition(\":\")\n meta[k.strip()] = v.strip()\n name = meta.get(\"name\", skill_dir.name)\n desc = meta.get(\"description\", \"\")\n skills.append(f\"- {name}: {desc}\")\n if not skills:\n return \"\"\n return \"# Available skills\\n\" + \"\\n\".join(skills)\n\n # -- Section 4: Memory content --\n def _build_memory_section(self) -> str:\n if not self.memory_dir.exists():\n return \"\"\n memories = []\n for md_file in sorted(self.memory_dir.glob(\"*.md\")):\n if md_file.name == \"MEMORY.md\":\n continue\n text = md_file.read_text()\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n(.*)\", text, re.DOTALL)\n if not match:\n continue\n header, body = match.group(1), match.group(2).strip()\n meta = {}\n for line in header.splitlines():\n if \":\" in line:\n k, _, v = line.partition(\":\")\n meta[k.strip()] = v.strip()\n name = meta.get(\"name\", md_file.stem)\n mem_type = meta.get(\"type\", \"project\")\n desc = meta.get(\"description\", \"\")\n memories.append(f\"[{mem_type}] {name}: {desc}\\n{body}\")\n if not memories:\n return \"\"\n return \"# Memories (persistent)\\n\\n\" + \"\\n\\n\".join(memories)\n\n # -- Section 5: CLAUDE.md chain --\n def _build_claude_md(self) -> str:\n \"\"\"\n Load CLAUDE.md files in priority order (all are included):\n 1. ~/.claude/CLAUDE.md (user-global instructions)\n 2. /CLAUDE.md (project instructions)\n 3. /CLAUDE.md (directory-specific instructions)\n \"\"\"\n sources = []\n\n # User-global\n user_claude = Path.home() / \".claude\" / \"CLAUDE.md\"\n if user_claude.exists():\n sources.append((\"user global (~/.claude/CLAUDE.md)\", user_claude.read_text()))\n\n # Project root\n project_claude = self.workdir / \"CLAUDE.md\"\n if project_claude.exists():\n sources.append((\"project root (CLAUDE.md)\", project_claude.read_text()))\n\n # Subdirectory -- in real CC, this walks from cwd up to project root\n # Teaching: check cwd if different from workdir\n cwd = Path.cwd()\n if cwd != self.workdir:\n subdir_claude = cwd / \"CLAUDE.md\"\n if subdir_claude.exists():\n sources.append((f\"subdir ({cwd.name}/CLAUDE.md)\", subdir_claude.read_text()))\n\n if not sources:\n return \"\"\n parts = [\"# CLAUDE.md instructions\"]\n for label, content in sources:\n parts.append(f\"## From {label}\")\n parts.append(content.strip())\n return \"\\n\\n\".join(parts)\n\n # -- Section 6: Dynamic context --\n def _build_dynamic_context(self) -> str:\n lines = [\n f\"Current date: {datetime.date.today().isoformat()}\",\n f\"Working directory: {self.workdir}\",\n f\"Model: {MODEL}\",\n f\"Platform: {os.uname().sysname}\",\n ]\n return \"# Dynamic context\\n\" + \"\\n\".join(lines)\n\n # -- Assemble all sections --\n def build(self) -> str:\n \"\"\"\n Assemble the full system prompt from all sections.\n\n Static sections (1-5) are separated from dynamic (6) by\n the DYNAMIC_BOUNDARY marker. In real CC, the static prefix\n is cached across turns to save prompt tokens.\n \"\"\"\n sections = []\n\n core = self._build_core()\n if core:\n sections.append(core)\n\n tools = self._build_tool_listing()\n if tools:\n sections.append(tools)\n\n skills = self._build_skill_listing()\n if skills:\n sections.append(skills)\n\n memory = self._build_memory_section()\n if memory:\n sections.append(memory)\n\n claude_md = self._build_claude_md()\n if claude_md:\n sections.append(claude_md)\n\n # Static/dynamic boundary\n sections.append(DYNAMIC_BOUNDARY)\n\n dynamic = self._build_dynamic_context()\n if dynamic:\n sections.append(dynamic)\n\n return \"\\n\\n\".join(sections)\n\n\ndef build_system_reminder(extra: str = None) -> dict:\n \"\"\"\n Build a system-reminder user message for per-turn dynamic content.\n\n The teaching version keeps reminders outside the stable system prompt so\n short-lived context does not get mixed into the long-lived instructions.\n \"\"\"\n parts = []\n if extra:\n parts.append(extra)\n if not parts:\n return None\n content = \"\\n\" + \"\\n\".join(parts) + \"\\n \"\n return {\"role\": \"user\", \"content\": content}\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n# Global prompt builder\nprompt_builder = SystemPromptBuilder(workdir=WORKDIR, tools=TOOLS)\n\n\ndef agent_loop(messages: list):\n \"\"\"\n Agent loop with assembled system prompt.\n\n The system prompt is rebuilt each iteration. In real CC, the static\n prefix is cached and only the dynamic suffix changes per turn.\n \"\"\"\n while True:\n system = prompt_builder.build()\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n # Show the assembled prompt at startup for educational purposes\n full_prompt = prompt_builder.build()\n section_count = full_prompt.count(\"\\n# \")\n print(f\"[System prompt assembled: {len(full_prompt)} chars, ~{section_count} sections]\")\n\n # /prompt command shows the full assembled prompt\n history = []\n while True:\n try:\n query = input(\"\\033[36ms10 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n if query.strip() == \"/prompt\":\n print(\"--- System Prompt ---\")\n print(prompt_builder.build())\n print(\"--- End ---\")\n continue\n\n if query.strip() == \"/sections\":\n prompt = prompt_builder.build()\n for line in prompt.splitlines():\n if line.startswith(\"# \") or line == DYNAMIC_BOUNDARY:\n print(f\" {line}\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): assembly(组装)——system prompt 是管线,不是单一字符串。\n\"\"\"\ns10_system_prompt.py - System Prompt(系统提示)构建\n\n本章核心观点:\nsystem prompt 应由清晰分段组装,而不是写成一整块硬编码大文本。\n\n教学版构建管线:\n 1. core instructions(核心指令)\n 2. tool listing(工具清单)\n 3. skill metadata(技能元数据)\n 4. memory section(记忆区段)\n 5. CLAUDE.md chain(CLAUDE 指令链)\n 6. dynamic context(动态上下文)\n\n构建器会把稳定信息与高频变化信息分离,\n并用 `DYNAMIC_BOUNDARY` 标记显式展示边界。\n\n逐轮 reminder(提醒)更动态,适合用单独 user-role 的 system reminder 注入,\n而不是直接混入稳定提示体。\n\n关键洞察:\n\"提示词构建是有边界的管线,不是一坨大字符串。\"\n\"\"\"\n\nimport datetime\nimport json\nimport os\nimport platform\nimport re\nimport subprocess\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\n\nDYNAMIC_BOUNDARY = \"=== DYNAMIC_BOUNDARY ===\"\n\n\nclass SystemPromptBuilder:\n \"\"\"\n 按独立区段组装 system prompt。\n\n 教学目标是清晰性:\n 每个区段只对应一个来源、一个职责。\n\n 这样更易推理、测试,也更易随能力增长演进。\n \"\"\"\n\n def __init__(self, workdir: Path = None, tools: list = None):\n self.workdir = workdir or WORKDIR\n self.tools = tools or []\n self.skills_dir = self.workdir / \"skills\"\n self.memory_dir = self.workdir / \".memory\"\n\n # -- Section 1: Core instructions(核心指令) --\n def _build_core(self) -> str:\n return (\n f\"你是运行在 {self.workdir} 的 coding agent(编码智能体)。\\n\"\n \"请使用提供的工具进行探索、读取、写入与编辑文件。\\n\"\n \"先验证再假设;优先读文件,不要凭空猜测。\"\n )\n\n # -- Section 2: Tool listings(工具清单) --\n def _build_tool_listing(self) -> str:\n if not self.tools:\n return \"\"\n lines = [\"# 可用工具\"]\n for tool in self.tools:\n props = tool.get(\"input_schema\", {}).get(\"properties\", {})\n params = \", \".join(props.keys())\n lines.append(f\"- {tool['name']}({params}): {tool['description']}\")\n return \"\\n\".join(lines)\n\n # -- Section 3: Skill metadata(技能元数据,s05 的第 1 层) --\n def _build_skill_listing(self) -> str:\n if not self.skills_dir.exists():\n return \"\"\n skills = []\n for skill_dir in sorted(self.skills_dir.iterdir()):\n skill_md = skill_dir / \"SKILL.md\"\n if not skill_md.exists():\n continue\n text = skill_md.read_text()\n # 解析 frontmatter:name + description\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\", text, re.DOTALL)\n if not match:\n continue\n meta = {}\n for line in match.group(1).splitlines():\n if \":\" in line:\n k, _, v = line.partition(\":\")\n meta[k.strip()] = v.strip()\n name = meta.get(\"name\", skill_dir.name)\n desc = meta.get(\"description\", \"\")\n skills.append(f\"- {name}: {desc}\")\n if not skills:\n return \"\"\n return \"# 可用技能\\n\" + \"\\n\".join(skills)\n\n # -- Section 4: Memory content(记忆内容) --\n def _build_memory_section(self) -> str:\n if not self.memory_dir.exists():\n return \"\"\n memories = []\n for md_file in sorted(self.memory_dir.glob(\"*.md\")):\n if md_file.name == \"MEMORY.md\":\n continue\n text = md_file.read_text()\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n(.*)\", text, re.DOTALL)\n if not match:\n continue\n header, body = match.group(1), match.group(2).strip()\n meta = {}\n for line in header.splitlines():\n if \":\" in line:\n k, _, v = line.partition(\":\")\n meta[k.strip()] = v.strip()\n name = meta.get(\"name\", md_file.stem)\n mem_type = meta.get(\"type\", \"project\")\n desc = meta.get(\"description\", \"\")\n memories.append(f\"[{mem_type}] {name}: {desc}\\n{body}\")\n if not memories:\n return \"\"\n return \"# 记忆(持久化)\\n\\n\" + \"\\n\\n\".join(memories)\n\n # -- Section 5: CLAUDE.md chain(链式指令) --\n def _build_claude_md(self) -> str:\n \"\"\"\n 按优先级加载 CLAUDE.md(全部纳入):\n 1. ~/.claude/CLAUDE.md(用户全局指令)\n 2. /CLAUDE.md(项目指令)\n 3. /CLAUDE.md(目录特定指令)\n \"\"\"\n sources = []\n\n # 用户全局\n user_claude = Path.home() / \".claude\" / \"CLAUDE.md\"\n if user_claude.exists():\n sources.append((\"用户全局(~/.claude/CLAUDE.md)\", user_claude.read_text()))\n\n # 项目根目录\n project_claude = self.workdir / \"CLAUDE.md\"\n if project_claude.exists():\n sources.append((\"项目根目录(project root / CLAUDE.md)\", project_claude.read_text()))\n\n # 子目录:真实 CC 会从 cwd 向上遍历到项目根;\n # 教学版简化为仅在 cwd != workdir 时检查一次。\n cwd = Path.cwd()\n if cwd != self.workdir:\n subdir_claude = cwd / \"CLAUDE.md\"\n if subdir_claude.exists():\n sources.append((f\"子目录(subdir / {cwd.name}/CLAUDE.md)\", subdir_claude.read_text()))\n\n if not sources:\n return \"\"\n parts = [\"# CLAUDE.md 指令\"]\n for label, content in sources:\n parts.append(f\"## 来源: {label}\")\n parts.append(content.strip())\n return \"\\n\\n\".join(parts)\n\n # -- Section 6: Dynamic context(动态上下文) --\n def _build_dynamic_context(self) -> str:\n lines = [\n f\"当前日期: {datetime.date.today().isoformat()}\",\n f\"工作目录: {self.workdir}\",\n f\"模型(Model): {MODEL}\",\n f\"平台(Platform): {platform.system()}\",\n ]\n return \"# 动态上下文\\n\" + \"\\n\".join(lines)\n\n # -- Assemble all sections(组装全部区段) --\n def build(self) -> str:\n \"\"\"\n 从全部区段组装完整 system prompt。\n\n 静态区段(1-5)与动态区段(6)由 `DYNAMIC_BOUNDARY` 分隔。\n 在真实 CC 中,静态前缀可跨轮缓存以节省 token。\n \"\"\"\n sections = []\n\n core = self._build_core()\n if core:\n sections.append(core)\n\n tools = self._build_tool_listing()\n if tools:\n sections.append(tools)\n\n skills = self._build_skill_listing()\n if skills:\n sections.append(skills)\n\n memory = self._build_memory_section()\n if memory:\n sections.append(memory)\n\n claude_md = self._build_claude_md()\n if claude_md:\n sections.append(claude_md)\n\n # 静态/动态边界\n sections.append(DYNAMIC_BOUNDARY)\n\n dynamic = self._build_dynamic_context()\n if dynamic:\n sections.append(dynamic)\n\n return \"\\n\\n\".join(sections)\n\n\ndef build_system_reminder(extra: str = None) -> dict:\n \"\"\"\n 为逐轮动态内容构建 system-reminder 用户消息。\n\n 教学版将 reminder 放在稳定 system prompt 之外,\n 避免短时上下文污染长期指令。\n \"\"\"\n parts = []\n if extra:\n parts.append(extra)\n if not parts:\n return None\n content = \"\\n\" + \"\\n\".join(parts) + \"\\n \"\n return {\"role\": \"user\", \"content\": content}\n\n\n# -- 工具实现 --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n# 全局 prompt 构建器\nprompt_builder = SystemPromptBuilder(workdir=WORKDIR, tools=TOOLS)\n\n\ndef agent_loop(messages: list):\n \"\"\"\n 使用组装式 system prompt 的智能体循环。\n\n 每轮都会重建 system prompt。真实 CC 中静态前缀会被缓存,\n 每轮只更新动态后缀。\n \"\"\"\n while True:\n system = prompt_builder.build()\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n # 启动时展示组装后的 prompt(教学可视化)\n full_prompt = prompt_builder.build()\n section_count = full_prompt.count(\"\\n# \")\n print(f\"[System prompt 已组装: {len(full_prompt)} 字符,约 {section_count} 个区段]\")\n\n # /prompt 命令:查看完整组装结果\n history = []\n while True:\n try:\n query = input(\"\\033[36ms10 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n if query.strip() == \"/prompt\":\n print(\"--- System Prompt(系统提示)---\")\n print(prompt_builder.build())\n print(\"--- 结束 ---\")\n continue\n\n if query.strip() == \"/sections\":\n prompt = prompt_builder.build()\n for line in prompt.splitlines():\n if line.startswith(\"# \") or line == DYNAMIC_BOUNDARY:\n print(f\" {line}\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: assembly -- the system prompt is a pipeline, not a string.\n\"\"\"\ns10_system_prompt.py - System Prompt Construction\n\nThis chapter teaches one core idea:\nthe system prompt should be assembled from clear sections, not written as one\ngiant hardcoded blob.\n\nTeaching pipeline:\n 1. core instructions\n 2. tool listing\n 3. skill metadata\n 4. memory section\n 5. CLAUDE.md chain\n 6. dynamic context\n\nThe builder keeps stable information separate from information that changes\noften. A simple DYNAMIC_BOUNDARY marker makes that split visible.\n\nPer-turn reminders are even more dynamic. They are better injected as a\nseparate user-role system reminder than mixed blindly into the stable prompt.\n\nKey insight: \"Prompt construction is a pipeline with boundaries, not one\nbig string.\"\n\"\"\"\n\nimport datetime\nimport json\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nDYNAMIC_BOUNDARY = \"=== DYNAMIC_BOUNDARY ===\"\n\n\nclass SystemPromptBuilder:\n \"\"\"\n Assemble the system prompt from independent sections.\n\n The teaching goal here is clarity:\n each section has one source and one responsibility.\n\n That makes the prompt easier to reason about, easier to test, and easier\n to evolve as the agent grows new capabilities.\n \"\"\"\n\n def __init__(self, workdir: Path = None, tools: list = None):\n self.workdir = workdir or WORKDIR\n self.tools = tools or []\n self.skills_dir = self.workdir / \"skills\"\n self.memory_dir = self.workdir / \".memory\"\n\n # -- Section 1: Core instructions --\n def _build_core(self) -> str:\n return (\n f\"You are a coding agent operating in {self.workdir}.\\n\"\n \"Use the provided tools to explore, read, write, and edit files.\\n\"\n \"Always verify before assuming. Prefer reading files over guessing.\"\n )\n\n # -- Section 2: Tool listings --\n def _build_tool_listing(self) -> str:\n if not self.tools:\n return \"\"\n lines = [\"# Available tools\"]\n for tool in self.tools:\n props = tool.get(\"input_schema\", {}).get(\"properties\", {})\n params = \", \".join(props.keys())\n lines.append(f\"- {tool['name']}({params}): {tool['description']}\")\n return \"\\n\".join(lines)\n\n # -- Section 3: Skill metadata (layer 1 from s05 concept) --\n def _build_skill_listing(self) -> str:\n if not self.skills_dir.exists():\n return \"\"\n skills = []\n for skill_dir in sorted(self.skills_dir.iterdir()):\n skill_md = skill_dir / \"SKILL.md\"\n if not skill_md.exists():\n continue\n text = skill_md.read_text()\n # Parse frontmatter for name + description\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\", text, re.DOTALL)\n if not match:\n continue\n meta = {}\n for line in match.group(1).splitlines():\n if \":\" in line:\n k, _, v = line.partition(\":\")\n meta[k.strip()] = v.strip()\n name = meta.get(\"name\", skill_dir.name)\n desc = meta.get(\"description\", \"\")\n skills.append(f\"- {name}: {desc}\")\n if not skills:\n return \"\"\n return \"# Available skills\\n\" + \"\\n\".join(skills)\n\n # -- Section 4: Memory content --\n def _build_memory_section(self) -> str:\n if not self.memory_dir.exists():\n return \"\"\n memories = []\n for md_file in sorted(self.memory_dir.glob(\"*.md\")):\n if md_file.name == \"MEMORY.md\":\n continue\n text = md_file.read_text()\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n(.*)\", text, re.DOTALL)\n if not match:\n continue\n header, body = match.group(1), match.group(2).strip()\n meta = {}\n for line in header.splitlines():\n if \":\" in line:\n k, _, v = line.partition(\":\")\n meta[k.strip()] = v.strip()\n name = meta.get(\"name\", md_file.stem)\n mem_type = meta.get(\"type\", \"project\")\n desc = meta.get(\"description\", \"\")\n memories.append(f\"[{mem_type}] {name}: {desc}\\n{body}\")\n if not memories:\n return \"\"\n return \"# Memories (persistent)\\n\\n\" + \"\\n\\n\".join(memories)\n\n # -- Section 5: CLAUDE.md chain --\n def _build_claude_md(self) -> str:\n \"\"\"\n Load CLAUDE.md files in priority order (all are included):\n 1. ~/.claude/CLAUDE.md (user-global instructions)\n 2. /CLAUDE.md (project instructions)\n 3. /CLAUDE.md (directory-specific instructions)\n \"\"\"\n sources = []\n\n # User-global\n user_claude = Path.home() / \".claude\" / \"CLAUDE.md\"\n if user_claude.exists():\n sources.append((\"user global (~/.claude/CLAUDE.md)\", user_claude.read_text()))\n\n # Project root\n project_claude = self.workdir / \"CLAUDE.md\"\n if project_claude.exists():\n sources.append((\"project root (CLAUDE.md)\", project_claude.read_text()))\n\n # Subdirectory -- in real CC, this walks from cwd up to project root\n # Teaching: check cwd if different from workdir\n cwd = Path.cwd()\n if cwd != self.workdir:\n subdir_claude = cwd / \"CLAUDE.md\"\n if subdir_claude.exists():\n sources.append((f\"subdir ({cwd.name}/CLAUDE.md)\", subdir_claude.read_text()))\n\n if not sources:\n return \"\"\n parts = [\"# CLAUDE.md instructions\"]\n for label, content in sources:\n parts.append(f\"## From {label}\")\n parts.append(content.strip())\n return \"\\n\\n\".join(parts)\n\n # -- Section 6: Dynamic context --\n def _build_dynamic_context(self) -> str:\n lines = [\n f\"Current date: {datetime.date.today().isoformat()}\",\n f\"Working directory: {self.workdir}\",\n f\"Model: {MODEL}\",\n f\"Platform: {os.uname().sysname}\",\n ]\n return \"# Dynamic context\\n\" + \"\\n\".join(lines)\n\n # -- Assemble all sections --\n def build(self) -> str:\n \"\"\"\n Assemble the full system prompt from all sections.\n\n Static sections (1-5) are separated from dynamic (6) by\n the DYNAMIC_BOUNDARY marker. In real CC, the static prefix\n is cached across turns to save prompt tokens.\n \"\"\"\n sections = []\n\n core = self._build_core()\n if core:\n sections.append(core)\n\n tools = self._build_tool_listing()\n if tools:\n sections.append(tools)\n\n skills = self._build_skill_listing()\n if skills:\n sections.append(skills)\n\n memory = self._build_memory_section()\n if memory:\n sections.append(memory)\n\n claude_md = self._build_claude_md()\n if claude_md:\n sections.append(claude_md)\n\n # Static/dynamic boundary\n sections.append(DYNAMIC_BOUNDARY)\n\n dynamic = self._build_dynamic_context()\n if dynamic:\n sections.append(dynamic)\n\n return \"\\n\\n\".join(sections)\n\n\ndef build_system_reminder(extra: str = None) -> dict:\n \"\"\"\n Build a system-reminder user message for per-turn dynamic content.\n\n The teaching version keeps reminders outside the stable system prompt so\n short-lived context does not get mixed into the long-lived instructions.\n \"\"\"\n parts = []\n if extra:\n parts.append(extra)\n if not parts:\n return None\n content = \"\\n\" + \"\\n\".join(parts) + \"\\n \"\n return {\"role\": \"user\", \"content\": content}\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n# Global prompt builder\nprompt_builder = SystemPromptBuilder(workdir=WORKDIR, tools=TOOLS)\n\n\ndef agent_loop(messages: list):\n \"\"\"\n Agent loop with assembled system prompt.\n\n The system prompt is rebuilt each iteration. In real CC, the static\n prefix is cached and only the dynamic suffix changes per turn.\n \"\"\"\n while True:\n system = prompt_builder.build()\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n # Show the assembled prompt at startup for educational purposes\n full_prompt = prompt_builder.build()\n section_count = full_prompt.count(\"\\n# \")\n print(f\"[System prompt assembled: {len(full_prompt)} chars, ~{section_count} sections]\")\n\n # /prompt command shows the full assembled prompt\n history = []\n while True:\n try:\n query = input(\"\\033[36ms10 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n if query.strip() == \"/prompt\":\n print(\"--- System Prompt ---\")\n print(prompt_builder.build())\n print(\"--- End ---\")\n continue\n\n if query.strip() == \"/sections\":\n prompt = prompt_builder.build()\n for line in prompt.splitlines():\n if line.startswith(\"# \") or line == DYNAMIC_BOUNDARY:\n print(f\" {line}\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: assembly -- the system prompt is a pipeline, not a string.\n\"\"\"\ns10_system_prompt.py - System Prompt Construction\n\nThis chapter teaches one core idea:\nthe system prompt should be assembled from clear sections, not written as one\ngiant hardcoded blob.\n\nTeaching pipeline:\n 1. core instructions\n 2. tool listing\n 3. skill metadata\n 4. memory section\n 5. CLAUDE.md chain\n 6. dynamic context\n\nThe builder keeps stable information separate from information that changes\noften. A simple DYNAMIC_BOUNDARY marker makes that split visible.\n\nPer-turn reminders are even more dynamic. They are better injected as a\nseparate user-role system reminder than mixed blindly into the stable prompt.\n\nKey insight: \"Prompt construction is a pipeline with boundaries, not one\nbig string.\"\n\"\"\"\n\nimport datetime\nimport json\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nDYNAMIC_BOUNDARY = \"=== DYNAMIC_BOUNDARY ===\"\n\n\nclass SystemPromptBuilder:\n \"\"\"\n Assemble the system prompt from independent sections.\n\n The teaching goal here is clarity:\n each section has one source and one responsibility.\n\n That makes the prompt easier to reason about, easier to test, and easier\n to evolve as the agent grows new capabilities.\n \"\"\"\n\n def __init__(self, workdir: Path = None, tools: list = None):\n self.workdir = workdir or WORKDIR\n self.tools = tools or []\n self.skills_dir = self.workdir / \"skills\"\n self.memory_dir = self.workdir / \".memory\"\n\n # -- Section 1: Core instructions --\n def _build_core(self) -> str:\n return (\n f\"You are a coding agent operating in {self.workdir}.\\n\"\n \"Use the provided tools to explore, read, write, and edit files.\\n\"\n \"Always verify before assuming. Prefer reading files over guessing.\"\n )\n\n # -- Section 2: Tool listings --\n def _build_tool_listing(self) -> str:\n if not self.tools:\n return \"\"\n lines = [\"# Available tools\"]\n for tool in self.tools:\n props = tool.get(\"input_schema\", {}).get(\"properties\", {})\n params = \", \".join(props.keys())\n lines.append(f\"- {tool['name']}({params}): {tool['description']}\")\n return \"\\n\".join(lines)\n\n # -- Section 3: Skill metadata (layer 1 from s05 concept) --\n def _build_skill_listing(self) -> str:\n if not self.skills_dir.exists():\n return \"\"\n skills = []\n for skill_dir in sorted(self.skills_dir.iterdir()):\n skill_md = skill_dir / \"SKILL.md\"\n if not skill_md.exists():\n continue\n text = skill_md.read_text()\n # Parse frontmatter for name + description\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\", text, re.DOTALL)\n if not match:\n continue\n meta = {}\n for line in match.group(1).splitlines():\n if \":\" in line:\n k, _, v = line.partition(\":\")\n meta[k.strip()] = v.strip()\n name = meta.get(\"name\", skill_dir.name)\n desc = meta.get(\"description\", \"\")\n skills.append(f\"- {name}: {desc}\")\n if not skills:\n return \"\"\n return \"# Available skills\\n\" + \"\\n\".join(skills)\n\n # -- Section 4: Memory content --\n def _build_memory_section(self) -> str:\n if not self.memory_dir.exists():\n return \"\"\n memories = []\n for md_file in sorted(self.memory_dir.glob(\"*.md\")):\n if md_file.name == \"MEMORY.md\":\n continue\n text = md_file.read_text()\n match = re.match(r\"^---\\s*\\n(.*?)\\n---\\s*\\n(.*)\", text, re.DOTALL)\n if not match:\n continue\n header, body = match.group(1), match.group(2).strip()\n meta = {}\n for line in header.splitlines():\n if \":\" in line:\n k, _, v = line.partition(\":\")\n meta[k.strip()] = v.strip()\n name = meta.get(\"name\", md_file.stem)\n mem_type = meta.get(\"type\", \"project\")\n desc = meta.get(\"description\", \"\")\n memories.append(f\"[{mem_type}] {name}: {desc}\\n{body}\")\n if not memories:\n return \"\"\n return \"# Memories (persistent)\\n\\n\" + \"\\n\\n\".join(memories)\n\n # -- Section 5: CLAUDE.md chain --\n def _build_claude_md(self) -> str:\n \"\"\"\n Load CLAUDE.md files in priority order (all are included):\n 1. ~/.claude/CLAUDE.md (user-global instructions)\n 2. /CLAUDE.md (project instructions)\n 3. /CLAUDE.md (directory-specific instructions)\n \"\"\"\n sources = []\n\n # User-global\n user_claude = Path.home() / \".claude\" / \"CLAUDE.md\"\n if user_claude.exists():\n sources.append((\"user global (~/.claude/CLAUDE.md)\", user_claude.read_text()))\n\n # Project root\n project_claude = self.workdir / \"CLAUDE.md\"\n if project_claude.exists():\n sources.append((\"project root (CLAUDE.md)\", project_claude.read_text()))\n\n # Subdirectory -- in real CC, this walks from cwd up to project root\n # Teaching: check cwd if different from workdir\n cwd = Path.cwd()\n if cwd != self.workdir:\n subdir_claude = cwd / \"CLAUDE.md\"\n if subdir_claude.exists():\n sources.append((f\"subdir ({cwd.name}/CLAUDE.md)\", subdir_claude.read_text()))\n\n if not sources:\n return \"\"\n parts = [\"# CLAUDE.md instructions\"]\n for label, content in sources:\n parts.append(f\"## From {label}\")\n parts.append(content.strip())\n return \"\\n\\n\".join(parts)\n\n # -- Section 6: Dynamic context --\n def _build_dynamic_context(self) -> str:\n lines = [\n f\"Current date: {datetime.date.today().isoformat()}\",\n f\"Working directory: {self.workdir}\",\n f\"Model: {MODEL}\",\n f\"Platform: {os.uname().sysname}\",\n ]\n return \"# Dynamic context\\n\" + \"\\n\".join(lines)\n\n # -- Assemble all sections --\n def build(self) -> str:\n \"\"\"\n Assemble the full system prompt from all sections.\n\n Static sections (1-5) are separated from dynamic (6) by\n the DYNAMIC_BOUNDARY marker. In real CC, the static prefix\n is cached across turns to save prompt tokens.\n \"\"\"\n sections = []\n\n core = self._build_core()\n if core:\n sections.append(core)\n\n tools = self._build_tool_listing()\n if tools:\n sections.append(tools)\n\n skills = self._build_skill_listing()\n if skills:\n sections.append(skills)\n\n memory = self._build_memory_section()\n if memory:\n sections.append(memory)\n\n claude_md = self._build_claude_md()\n if claude_md:\n sections.append(claude_md)\n\n # Static/dynamic boundary\n sections.append(DYNAMIC_BOUNDARY)\n\n dynamic = self._build_dynamic_context()\n if dynamic:\n sections.append(dynamic)\n\n return \"\\n\\n\".join(sections)\n\n\ndef build_system_reminder(extra: str = None) -> dict:\n \"\"\"\n Build a system-reminder user message for per-turn dynamic content.\n\n The teaching version keeps reminders outside the stable system prompt so\n short-lived context does not get mixed into the long-lived instructions.\n \"\"\"\n parts = []\n if extra:\n parts.append(extra)\n if not parts:\n return None\n content = \"\\n\" + \"\\n\".join(parts) + \"\\n \"\n return {\"role\": \"user\", \"content\": content}\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n# Global prompt builder\nprompt_builder = SystemPromptBuilder(workdir=WORKDIR, tools=TOOLS)\n\n\ndef agent_loop(messages: list):\n \"\"\"\n Agent loop with assembled system prompt.\n\n The system prompt is rebuilt each iteration. In real CC, the static\n prefix is cached and only the dynamic suffix changes per turn.\n \"\"\"\n while True:\n system = prompt_builder.build()\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n # Show the assembled prompt at startup for educational purposes\n full_prompt = prompt_builder.build()\n section_count = full_prompt.count(\"\\n# \")\n print(f\"[System prompt assembled: {len(full_prompt)} chars, ~{section_count} sections]\")\n\n # /prompt command shows the full assembled prompt\n history = []\n while True:\n try:\n query = input(\"\\033[36ms10 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n if query.strip() == \"/prompt\":\n print(\"--- System Prompt ---\")\n print(prompt_builder.build())\n print(\"--- End ---\")\n continue\n\n if query.strip() == \"/sections\":\n prompt = prompt_builder.build()\n for line in prompt.splitlines():\n if line.startswith(\"# \") or line == DYNAMIC_BOUNDARY:\n print(f\" {line}\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
},
{
- "id": "s06",
- "filename": "s06_context_compact.py",
- "title": "Compact",
- "subtitle": "Three-Layer Compression",
- "loc": 205,
+ "id": "s11",
+ "filename": "s11_error_recovery.py",
+ "title": "Error Recovery",
+ "subtitle": "Recover, Then Continue",
+ "loc": 249,
"tools": [
"bash",
"read_file",
"write_file",
- "edit_file",
- "compact"
- ],
- "newTools": [
- "compact"
+ "edit_file"
],
- "coreAddition": "micro-compact + auto-compact + archival",
- "keyInsight": "Context will fill up; three-layer compression strategy enables infinite sessions",
+ "newTools": [],
+ "coreAddition": "Continuation reasons + retry branches",
+ "keyInsight": "Most failures aren't true task failure -- they're signals to try a different path.",
"classes": [],
"functions": [
{
"name": "estimate_tokens",
"signature": "def estimate_tokens(messages: list)",
- "startLine": 61
- },
- {
- "name": "micro_compact",
- "signature": "def micro_compact(messages: list)",
- "startLine": 67
+ "startLine": 74
},
{
"name": "auto_compact",
"signature": "def auto_compact(messages: list)",
- "startLine": 97
+ "startLine": 79
+ },
+ {
+ "name": "backoff_delay",
+ "signature": "def backoff_delay(attempt: int)",
+ "startLine": 111
},
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
- "startLine": 124
+ "startLine": 119
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
- "startLine": 130
+ "startLine": 126
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
- "startLine": 142
+ "startLine": 139
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
- "startLine": 151
+ "startLine": 149
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
- "startLine": 160
+ "startLine": 159
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 194
+ "startLine": 192
}
],
- "layer": "memory",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns06_context_compact.py - Compact\n\nThree-layer compression pipeline so the agent can work forever:\n\n Every turn:\n +------------------+\n | Tool call result |\n +------------------+\n |\n v\n [Layer 1: micro_compact] (silent, every turn)\n Replace tool_result content older than last 3\n with \"[Previous: used {tool_name}]\"\n |\n v\n [Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\n continue [Layer 2: auto_compact]\n Save full transcript to .transcripts/\n Ask LLM to summarize conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact -> immediate summarization.\n Same as auto, triggered manually.\n\nKey insight: \"The agent can forget strategically and keep working forever.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n\nTHRESHOLD = 50000\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nKEEP_RECENT = 3\n\n\ndef estimate_tokens(messages: list) -> int:\n \"\"\"Rough token count: ~4 chars per token.\"\"\"\n return len(str(messages)) // 4\n\n\n# -- Layer 1: micro_compact - replace old tool results with placeholders --\ndef micro_compact(messages: list) -> list:\n # Collect (msg_index, part_index, tool_result_dict) for all tool_result entries\n tool_results = []\n for msg_idx, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for part_idx, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((msg_idx, part_idx, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n # Find tool_name for each result by matching tool_use_id in prior assistant messages\n tool_name_map = {}\n for msg in messages:\n if msg[\"role\"] == \"assistant\":\n content = msg.get(\"content\", [])\n if isinstance(content, list):\n for block in content:\n if hasattr(block, \"type\") and block.type == \"tool_use\":\n tool_name_map[block.id] = block.name\n # Clear old results (keep last KEEP_RECENT)\n to_clear = tool_results[:-KEEP_RECENT]\n for _, _, result in to_clear:\n if isinstance(result.get(\"content\"), str) and len(result[\"content\"]) > 100:\n tool_id = result.get(\"tool_use_id\", \"\")\n tool_name = tool_name_map.get(tool_id, \"unknown\")\n result[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n\n\n# -- Layer 2: auto_compact - save transcript, summarize, replace messages --\ndef auto_compact(messages: list) -> list:\n # Save full transcript to disk\n TRANSCRIPT_DIR.mkdir(exist_ok=True)\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n print(f\"[transcript saved: {transcript_path}]\")\n # Ask LLM to summarize\n conversation_text = json.dumps(messages, default=str)[:80000]\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity. Include: \"\n \"1) What was accomplished, 2) Current state, 3) Key decisions made. \"\n \"Be concise but preserve critical details.\\n\\n\" + conversation_text}],\n max_tokens=2000,\n )\n summary = response.content[0].text\n # Replace all messages with compressed summary\n return [\n {\"role\": \"user\", \"content\": f\"[Conversation compressed. Transcript: {transcript_path}]\\n\\n{summary}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. I have the context from the summary. Continuing.\"},\n ]\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"compact\": lambda **kw: \"Manual compression requested.\",\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"compact\", \"description\": \"Trigger manual conversation compression.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"focus\": {\"type\": \"string\", \"description\": \"What to preserve in the summary\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Layer 1: micro_compact before each LLM call\n micro_compact(messages)\n # Layer 2: auto_compact if token estimate exceeds threshold\n if estimate_tokens(messages) > THRESHOLD:\n print(\"[auto_compact triggered]\")\n messages[:] = auto_compact(messages)\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n manual_compact = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"compact\":\n manual_compact = True\n output = \"Compressing...\"\n else:\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n # Layer 3: manual compact triggered by the compact tool\n if manual_compact:\n print(\"[manual compact]\")\n messages[:] = auto_compact(messages)\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms06 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ "layer": "hardening",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: resilience -- a robust agent recovers instead of crashing.\n\"\"\"\ns11_error_recovery.py - Error Recovery\n\nTeaching demo of three recovery paths:\n\n- continue when output is truncated\n- compact when context grows too large\n- back off when transport errors are temporary\n\n LLM response\n |\n v\n [Check stop_reason]\n |\n +-- \"max_tokens\" ----> [Strategy 1: max_output_tokens recovery]\n | Inject continuation message:\n | \"Output limit hit. Continue directly.\"\n | Retry up to MAX_RECOVERY_ATTEMPTS (3).\n | Counter: max_output_recovery_count\n |\n +-- API error -------> [Check error type]\n | |\n | +-- prompt_too_long --> [Strategy 2: compact + retry]\n | | Trigger auto_compact (LLM summary).\n | | Replace history with summary.\n | | Retry the turn.\n | |\n | +-- connection/rate --> [Strategy 3: backoff retry]\n | Exponential backoff: base * 2^attempt + jitter\n | Up to 3 retries.\n |\n +-- \"end_turn\" -----> [Normal exit]\n\n Recovery priority (first match wins):\n 1. max_tokens -> inject continuation, retry\n 2. prompt_too_long -> compact, retry\n 3. connection error -> backoff, retry\n 4. all retries exhausted -> fail gracefully\n\"\"\"\n\nimport json\nimport os\nimport random\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic, APIError\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# Recovery constants\nMAX_RECOVERY_ATTEMPTS = 3\nBACKOFF_BASE_DELAY = 1.0 # seconds\nBACKOFF_MAX_DELAY = 30.0 # seconds\nTOKEN_THRESHOLD = 50000 # chars / 4 ~ tokens for compact trigger\n\nCONTINUATION_MESSAGE = (\n \"Output limit hit. Continue directly from where you stopped -- \"\n \"no recap, no repetition. Pick up mid-sentence if needed.\"\n)\n\n\ndef estimate_tokens(messages: list) -> int:\n \"\"\"Rough token estimate: ~4 chars per token.\"\"\"\n return len(json.dumps(messages, default=str)) // 4\n\n\ndef auto_compact(messages: list) -> list:\n \"\"\"\n Compress conversation history into a short continuation summary.\n \"\"\"\n conversation_text = json.dumps(messages, default=str)[:80000]\n prompt = (\n \"Summarize this conversation for continuity. Include:\\n\"\n \"1) Task overview and success criteria\\n\"\n \"2) Current state: completed work, files touched\\n\"\n \"3) Key decisions and failed approaches\\n\"\n \"4) Remaining next steps\\n\"\n \"Be concise but preserve critical details.\\n\\n\"\n + conversation_text\n )\n try:\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=4000,\n )\n summary = response.content[0].text\n except Exception as e:\n summary = f\"(compact failed: {e}). Previous context lost.\"\n\n continuation = (\n \"This session continues from a previous conversation that was compacted. \"\n f\"Summary of prior context:\\n\\n{summary}\\n\\n\"\n \"Continue from where we left off without re-asking the user.\"\n )\n return [{\"role\": \"user\", \"content\": continuation}]\n\n\ndef backoff_delay(attempt: int) -> float:\n \"\"\"Exponential backoff with jitter: base * 2^attempt + random(0, 1).\"\"\"\n delay = min(BACKOFF_BASE_DELAY * (2 ** attempt), BACKOFF_MAX_DELAY)\n jitter = random.uniform(0, 1)\n return delay + jitter\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n\n\ndef agent_loop(messages: list):\n \"\"\"\n Error-recovering agent loop with three paths:\n\n 1. continue after max_tokens\n 2. compact after prompt-too-long\n 3. back off after transient transport failure\n \"\"\"\n max_output_recovery_count = 0\n\n while True:\n # -- Attempt the API call with connection retry --\n response = None\n for attempt in range(MAX_RECOVERY_ATTEMPTS + 1):\n try:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n break # success\n\n except APIError as e:\n error_body = str(e).lower()\n\n # Strategy 2: prompt_too_long -> compact and retry\n if \"overlong_prompt\" in error_body or (\"prompt\" in error_body and \"long\" in error_body):\n print(f\"[Recovery] Prompt too long. Compacting... (attempt {attempt + 1})\")\n messages[:] = auto_compact(messages)\n continue\n\n # Strategy 3: connection/rate errors -> backoff\n if attempt < MAX_RECOVERY_ATTEMPTS:\n delay = backoff_delay(attempt)\n print(f\"[Recovery] API error: {e}. \"\n f\"Retrying in {delay:.1f}s (attempt {attempt + 1}/{MAX_RECOVERY_ATTEMPTS})\")\n time.sleep(delay)\n continue\n\n # All retries exhausted\n print(f\"[Error] API call failed after {MAX_RECOVERY_ATTEMPTS} retries: {e}\")\n return\n\n except (ConnectionError, TimeoutError, OSError) as e:\n # Strategy 3: network-level errors -> backoff\n if attempt < MAX_RECOVERY_ATTEMPTS:\n delay = backoff_delay(attempt)\n print(f\"[Recovery] Connection error: {e}. \"\n f\"Retrying in {delay:.1f}s (attempt {attempt + 1}/{MAX_RECOVERY_ATTEMPTS})\")\n time.sleep(delay)\n continue\n\n print(f\"[Error] Connection failed after {MAX_RECOVERY_ATTEMPTS} retries: {e}\")\n return\n\n if response is None:\n print(\"[Error] No response received.\")\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n # -- Strategy 1: max_tokens recovery --\n if response.stop_reason == \"max_tokens\":\n max_output_recovery_count += 1\n if max_output_recovery_count <= MAX_RECOVERY_ATTEMPTS:\n print(f\"[Recovery] max_tokens hit \"\n f\"({max_output_recovery_count}/{MAX_RECOVERY_ATTEMPTS}). \"\n \"Injecting continuation...\")\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_MESSAGE})\n continue # retry the loop\n else:\n print(f\"[Error] max_tokens recovery exhausted \"\n f\"({MAX_RECOVERY_ATTEMPTS} attempts). Stopping.\")\n return\n\n # Reset max_tokens counter on successful non-max_tokens response\n max_output_recovery_count = 0\n\n # -- Normal end_turn: no tool use requested --\n if response.stop_reason != \"tool_use\":\n return\n\n # -- Process tool calls --\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Check if we should auto-compact (proactive, not just reactive)\n if estimate_tokens(messages) > TOKEN_THRESHOLD:\n print(\"[Recovery] Token estimate exceeds threshold. Auto-compacting...\")\n messages[:] = auto_compact(messages)\n\n\nif __name__ == \"__main__\":\n print(\"[Error recovery enabled: max_tokens / prompt_too_long / connection backoff]\")\n history = []\n while True:\n try:\n query = input(\"\\033[36ms11 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): resilience(韧性)——稳健智能体应恢复而非崩溃。\n\"\"\"\ns11_error_recovery.py - Error Recovery(错误恢复)\n\n教学版展示三条恢复路径:\n\n- 输出被截断时继续(continue when output is truncated)\n- 上下文过大时压缩(compact when context grows too large)\n- 传输层临时错误时退避重试(back off when transport errors are temporary)\n\n LLM 响应(response)\n |\n v\n [检查 stop_reason]\n |\n +-- \"max_tokens\" ----> [策略 1:max_output_tokens 恢复]\n | 注入续写消息:\n | \"Output limit hit. Continue directly.\"(达到输出上限,请直接续写)\n | 最多重试 MAX_RECOVERY_ATTEMPTS(3)次\n | 计数器:max_output_recovery_count\n |\n +-- API error -------> [检查错误类型]\n | |\n | +-- prompt_too_long --> [策略 2:压缩后重试]\n | | 触发 auto_compact(LLM 摘要)\n | | 用摘要替换历史\n | | 重试当前轮\n | |\n | +-- connection/rate --> [策略 3:退避重试]\n | 指数退避:base * 2^attempt + jitter\n | 最多重试 3 次\n |\n +-- \"end_turn\" -----> [正常结束]\n\n 恢复优先级(first match wins,首条命中):\n 1. max_tokens -> 注入续写消息并重试\n 2. prompt_too_long -> 压缩并重试\n 3. connection error -> 退避并重试\n 4. 全部重试耗尽 -> 优雅失败\n\"\"\"\n\nimport json\nimport os\nimport random\nimport subprocess\nimport time\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import APIError, create_client\nexcept ModuleNotFoundError:\n from llm_client import APIError, create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\n\n# 恢复相关常量\nMAX_RECOVERY_ATTEMPTS = 3\nBACKOFF_BASE_DELAY = 1.0 # 秒\nBACKOFF_MAX_DELAY = 30.0 # 秒\nTOKEN_THRESHOLD = 50000 # chars(字符)/4 ≈ tokens(token),用于压缩触发\n\nCONTINUATION_MESSAGE = (\n \"输出达到上限,请从中断处直接继续。\"\n \"不要复述,不要重复,必要时可从句中继续。\"\n)\n\n\ndef estimate_tokens(messages: list) -> int:\n \"\"\"粗略估算 token:约 4 个字符 ≈ 1 token。\"\"\"\n return len(json.dumps(messages, default=str)) // 4\n\n\ndef auto_compact(messages: list) -> list:\n \"\"\"\n 将会话历史压缩为可续写的短摘要。\n \"\"\"\n conversation_text = json.dumps(messages, default=str)[:80000]\n prompt = (\n \"请为延续执行总结这段会话,包含:\\n\"\n \"1) 任务概览与成功标准\\n\"\n \"2) 当前状态:已完成工作、涉及文件\\n\"\n \"3) 关键决策与失败尝试\\n\"\n \"4) 剩余下一步\\n\"\n \"请保持简洁但保留关键细节。\\n\\n\"\n + conversation_text\n )\n try:\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=4000,\n )\n summary = response.content[0].text\n except Exception as e:\n summary = f\"(compact 失败:{e})。此前上下文已丢失。\"\n\n continuation = (\n \"当前会话承接自已 compact 的历史会话。\"\n f\"先前上下文摘要如下:\\n\\n{summary}\\n\\n\"\n \"请直接从中断点继续,不要重复向用户提问。\"\n )\n return [{\"role\": \"user\", \"content\": continuation}]\n\n\ndef backoff_delay(attempt: int) -> float:\n \"\"\"指数退避 + 抖动:base * 2^attempt + random(0, 1)。\"\"\"\n delay = min(BACKOFF_BASE_DELAY * (2 ** attempt), BACKOFF_MAX_DELAY)\n jitter = random.uniform(0, 1)\n return delay + jitter\n\n\n# -- 工具实现 --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\nSYSTEM = f\"你是位于 {WORKDIR} 的 coding agent(编码智能体),请使用工具解决任务。\"\n\n\ndef agent_loop(messages: list):\n \"\"\"\n 带三类恢复路径的智能体循环:\n\n 1. max_tokens 后续写恢复\n 2. prompt 过长后 compact 恢复\n 3. 瞬时传输故障后退避重试\n \"\"\"\n max_output_recovery_count = 0\n\n while True:\n # -- 尝试 API 调用(含连接重试) --\n response = None\n for attempt in range(MAX_RECOVERY_ATTEMPTS + 1):\n try:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n break # success(成功)\n\n except APIError as e:\n error_body = str(e).lower()\n\n # 策略 2:prompt_too_long -> compact 后重试\n if \"overlong_prompt\" in error_body or (\"prompt\" in error_body and \"long\" in error_body):\n print(f\"[Recovery] 提示词过长,正在压缩...(第 {attempt + 1} 次)\")\n messages[:] = auto_compact(messages)\n continue\n\n # 策略 3:连接/限流错误 -> 退避重试\n if attempt < MAX_RECOVERY_ATTEMPTS:\n delay = backoff_delay(attempt)\n print(f\"[Recovery] API 错误:{e}。\"\n f\"将在 {delay:.1f}s 后重试(第 {attempt + 1}/{MAX_RECOVERY_ATTEMPTS} 次)\")\n time.sleep(delay)\n continue\n\n # 重试耗尽\n print(f\"[Error] API 调用在重试 {MAX_RECOVERY_ATTEMPTS} 次后仍失败:{e}\")\n return\n\n except (ConnectionError, TimeoutError, OSError) as e:\n # 策略 3:网络层错误 -> 退避重试\n if attempt < MAX_RECOVERY_ATTEMPTS:\n delay = backoff_delay(attempt)\n print(f\"[Recovery] 连接错误:{e}。\"\n f\"将在 {delay:.1f}s 后重试(第 {attempt + 1}/{MAX_RECOVERY_ATTEMPTS} 次)\")\n time.sleep(delay)\n continue\n\n print(f\"[Error] 连接在重试 {MAX_RECOVERY_ATTEMPTS} 次后仍失败:{e}\")\n return\n\n if response is None:\n print(\"[Error] 未收到响应。\")\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n # -- 策略 1:max_tokens 恢复 --\n if response.stop_reason == \"max_tokens\":\n max_output_recovery_count += 1\n if max_output_recovery_count <= MAX_RECOVERY_ATTEMPTS:\n print(f\"[Recovery] 触发 max_tokens \"\n f\"({max_output_recovery_count}/{MAX_RECOVERY_ATTEMPTS}). \"\n \"注入 continuation 消息并重试...\")\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_MESSAGE})\n continue # 继续循环重试\n else:\n print(f\"[Error] max_tokens recovery exhausted \"\n f\"(已尝试 {MAX_RECOVERY_ATTEMPTS} 次)。停止重试。\")\n return\n\n # 非 max_tokens 成功返回后重置计数\n max_output_recovery_count = 0\n\n # -- 正常 end_turn:未请求工具调用 --\n if response.stop_reason != \"tool_use\":\n return\n\n # -- 处理工具调用 --\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n # 主动检查是否需要 auto-compact(而非仅被动触发)\n if estimate_tokens(messages) > TOKEN_THRESHOLD:\n print(\"[Recovery] Token 估算超出阈值,正在自动压缩...\")\n messages[:] = auto_compact(messages)\n\n\nif __name__ == \"__main__\":\n print(\"[已启用错误恢复:max_tokens / prompt_too_long / connection backoff]\")\n history = []\n while True:\n try:\n query = input(\"\\033[36ms11 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: resilience -- a robust agent recovers instead of crashing.\n\"\"\"\ns11_error_recovery.py - Error Recovery\n\nTeaching demo of three recovery paths:\n\n- continue when output is truncated\n- compact when context grows too large\n- back off when transport errors are temporary\n\n LLM response\n |\n v\n [Check stop_reason]\n |\n +-- \"max_tokens\" ----> [Strategy 1: max_output_tokens recovery]\n | Inject continuation message:\n | \"Output limit hit. Continue directly.\"\n | Retry up to MAX_RECOVERY_ATTEMPTS (3).\n | Counter: max_output_recovery_count\n |\n +-- API error -------> [Check error type]\n | |\n | +-- prompt_too_long --> [Strategy 2: compact + retry]\n | | Trigger auto_compact (LLM summary).\n | | Replace history with summary.\n | | Retry the turn.\n | |\n | +-- connection/rate --> [Strategy 3: backoff retry]\n | Exponential backoff: base * 2^attempt + jitter\n | Up to 3 retries.\n |\n +-- \"end_turn\" -----> [Normal exit]\n\n Recovery priority (first match wins):\n 1. max_tokens -> inject continuation, retry\n 2. prompt_too_long -> compact, retry\n 3. connection error -> backoff, retry\n 4. all retries exhausted -> fail gracefully\n\"\"\"\n\nimport json\nimport os\nimport random\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic, APIError\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# Recovery constants\nMAX_RECOVERY_ATTEMPTS = 3\nBACKOFF_BASE_DELAY = 1.0 # seconds\nBACKOFF_MAX_DELAY = 30.0 # seconds\nTOKEN_THRESHOLD = 50000 # chars / 4 ~ tokens for compact trigger\n\nCONTINUATION_MESSAGE = (\n \"Output limit hit. Continue directly from where you stopped -- \"\n \"no recap, no repetition. Pick up mid-sentence if needed.\"\n)\n\n\ndef estimate_tokens(messages: list) -> int:\n \"\"\"Rough token estimate: ~4 chars per token.\"\"\"\n return len(json.dumps(messages, default=str)) // 4\n\n\ndef auto_compact(messages: list) -> list:\n \"\"\"\n Compress conversation history into a short continuation summary.\n \"\"\"\n conversation_text = json.dumps(messages, default=str)[:80000]\n prompt = (\n \"Summarize this conversation for continuity. Include:\\n\"\n \"1) Task overview and success criteria\\n\"\n \"2) Current state: completed work, files touched\\n\"\n \"3) Key decisions and failed approaches\\n\"\n \"4) Remaining next steps\\n\"\n \"Be concise but preserve critical details.\\n\\n\"\n + conversation_text\n )\n try:\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=4000,\n )\n summary = response.content[0].text\n except Exception as e:\n summary = f\"(compact failed: {e}). Previous context lost.\"\n\n continuation = (\n \"This session continues from a previous conversation that was compacted. \"\n f\"Summary of prior context:\\n\\n{summary}\\n\\n\"\n \"Continue from where we left off without re-asking the user.\"\n )\n return [{\"role\": \"user\", \"content\": continuation}]\n\n\ndef backoff_delay(attempt: int) -> float:\n \"\"\"Exponential backoff with jitter: base * 2^attempt + random(0, 1).\"\"\"\n delay = min(BACKOFF_BASE_DELAY * (2 ** attempt), BACKOFF_MAX_DELAY)\n jitter = random.uniform(0, 1)\n return delay + jitter\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n\n\ndef agent_loop(messages: list):\n \"\"\"\n Error-recovering agent loop with three paths:\n\n 1. continue after max_tokens\n 2. compact after prompt-too-long\n 3. back off after transient transport failure\n \"\"\"\n max_output_recovery_count = 0\n\n while True:\n # -- Attempt the API call with connection retry --\n response = None\n for attempt in range(MAX_RECOVERY_ATTEMPTS + 1):\n try:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n break # success\n\n except APIError as e:\n error_body = str(e).lower()\n\n # Strategy 2: prompt_too_long -> compact and retry\n if \"overlong_prompt\" in error_body or (\"prompt\" in error_body and \"long\" in error_body):\n print(f\"[Recovery] Prompt too long. Compacting... (attempt {attempt + 1})\")\n messages[:] = auto_compact(messages)\n continue\n\n # Strategy 3: connection/rate errors -> backoff\n if attempt < MAX_RECOVERY_ATTEMPTS:\n delay = backoff_delay(attempt)\n print(f\"[Recovery] API error: {e}. \"\n f\"Retrying in {delay:.1f}s (attempt {attempt + 1}/{MAX_RECOVERY_ATTEMPTS})\")\n time.sleep(delay)\n continue\n\n # All retries exhausted\n print(f\"[Error] API call failed after {MAX_RECOVERY_ATTEMPTS} retries: {e}\")\n return\n\n except (ConnectionError, TimeoutError, OSError) as e:\n # Strategy 3: network-level errors -> backoff\n if attempt < MAX_RECOVERY_ATTEMPTS:\n delay = backoff_delay(attempt)\n print(f\"[Recovery] Connection error: {e}. \"\n f\"Retrying in {delay:.1f}s (attempt {attempt + 1}/{MAX_RECOVERY_ATTEMPTS})\")\n time.sleep(delay)\n continue\n\n print(f\"[Error] Connection failed after {MAX_RECOVERY_ATTEMPTS} retries: {e}\")\n return\n\n if response is None:\n print(\"[Error] No response received.\")\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n # -- Strategy 1: max_tokens recovery --\n if response.stop_reason == \"max_tokens\":\n max_output_recovery_count += 1\n if max_output_recovery_count <= MAX_RECOVERY_ATTEMPTS:\n print(f\"[Recovery] max_tokens hit \"\n f\"({max_output_recovery_count}/{MAX_RECOVERY_ATTEMPTS}). \"\n \"Injecting continuation...\")\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_MESSAGE})\n continue # retry the loop\n else:\n print(f\"[Error] max_tokens recovery exhausted \"\n f\"({MAX_RECOVERY_ATTEMPTS} attempts). Stopping.\")\n return\n\n # Reset max_tokens counter on successful non-max_tokens response\n max_output_recovery_count = 0\n\n # -- Normal end_turn: no tool use requested --\n if response.stop_reason != \"tool_use\":\n return\n\n # -- Process tool calls --\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Check if we should auto-compact (proactive, not just reactive)\n if estimate_tokens(messages) > TOKEN_THRESHOLD:\n print(\"[Recovery] Token estimate exceeds threshold. Auto-compacting...\")\n messages[:] = auto_compact(messages)\n\n\nif __name__ == \"__main__\":\n print(\"[Error recovery enabled: max_tokens / prompt_too_long / connection backoff]\")\n history = []\n while True:\n try:\n query = input(\"\\033[36ms11 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: resilience -- a robust agent recovers instead of crashing.\n\"\"\"\ns11_error_recovery.py - Error Recovery\n\nTeaching demo of three recovery paths:\n\n- continue when output is truncated\n- compact when context grows too large\n- back off when transport errors are temporary\n\n LLM response\n |\n v\n [Check stop_reason]\n |\n +-- \"max_tokens\" ----> [Strategy 1: max_output_tokens recovery]\n | Inject continuation message:\n | \"Output limit hit. Continue directly.\"\n | Retry up to MAX_RECOVERY_ATTEMPTS (3).\n | Counter: max_output_recovery_count\n |\n +-- API error -------> [Check error type]\n | |\n | +-- prompt_too_long --> [Strategy 2: compact + retry]\n | | Trigger auto_compact (LLM summary).\n | | Replace history with summary.\n | | Retry the turn.\n | |\n | +-- connection/rate --> [Strategy 3: backoff retry]\n | Exponential backoff: base * 2^attempt + jitter\n | Up to 3 retries.\n |\n +-- \"end_turn\" -----> [Normal exit]\n\n Recovery priority (first match wins):\n 1. max_tokens -> inject continuation, retry\n 2. prompt_too_long -> compact, retry\n 3. connection error -> backoff, retry\n 4. all retries exhausted -> fail gracefully\n\"\"\"\n\nimport json\nimport os\nimport random\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic, APIError\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n# Recovery constants\nMAX_RECOVERY_ATTEMPTS = 3\nBACKOFF_BASE_DELAY = 1.0 # seconds\nBACKOFF_MAX_DELAY = 30.0 # seconds\nTOKEN_THRESHOLD = 50000 # chars / 4 ~ tokens for compact trigger\n\nCONTINUATION_MESSAGE = (\n \"Output limit hit. Continue directly from where you stopped -- \"\n \"no recap, no repetition. Pick up mid-sentence if needed.\"\n)\n\n\ndef estimate_tokens(messages: list) -> int:\n \"\"\"Rough token estimate: ~4 chars per token.\"\"\"\n return len(json.dumps(messages, default=str)) // 4\n\n\ndef auto_compact(messages: list) -> list:\n \"\"\"\n Compress conversation history into a short continuation summary.\n \"\"\"\n conversation_text = json.dumps(messages, default=str)[:80000]\n prompt = (\n \"Summarize this conversation for continuity. Include:\\n\"\n \"1) Task overview and success criteria\\n\"\n \"2) Current state: completed work, files touched\\n\"\n \"3) Key decisions and failed approaches\\n\"\n \"4) Remaining next steps\\n\"\n \"Be concise but preserve critical details.\\n\\n\"\n + conversation_text\n )\n try:\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\": prompt}],\n max_tokens=4000,\n )\n summary = response.content[0].text\n except Exception as e:\n summary = f\"(compact failed: {e}). Previous context lost.\"\n\n continuation = (\n \"This session continues from a previous conversation that was compacted. \"\n f\"Summary of prior context:\\n\\n{summary}\\n\\n\"\n \"Continue from where we left off without re-asking the user.\"\n )\n return [{\"role\": \"user\", \"content\": continuation}]\n\n\ndef backoff_delay(attempt: int) -> float:\n \"\"\"Exponential backoff with jitter: base * 2^attempt + random(0, 1).\"\"\"\n delay = min(BACKOFF_BASE_DELAY * (2 ** attempt), BACKOFF_MAX_DELAY)\n jitter = random.uniform(0, 1)\n return delay + jitter\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n\n\ndef agent_loop(messages: list):\n \"\"\"\n Error-recovering agent loop with three paths:\n\n 1. continue after max_tokens\n 2. compact after prompt-too-long\n 3. back off after transient transport failure\n \"\"\"\n max_output_recovery_count = 0\n\n while True:\n # -- Attempt the API call with connection retry --\n response = None\n for attempt in range(MAX_RECOVERY_ATTEMPTS + 1):\n try:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n break # success\n\n except APIError as e:\n error_body = str(e).lower()\n\n # Strategy 2: prompt_too_long -> compact and retry\n if \"overlong_prompt\" in error_body or (\"prompt\" in error_body and \"long\" in error_body):\n print(f\"[Recovery] Prompt too long. Compacting... (attempt {attempt + 1})\")\n messages[:] = auto_compact(messages)\n continue\n\n # Strategy 3: connection/rate errors -> backoff\n if attempt < MAX_RECOVERY_ATTEMPTS:\n delay = backoff_delay(attempt)\n print(f\"[Recovery] API error: {e}. \"\n f\"Retrying in {delay:.1f}s (attempt {attempt + 1}/{MAX_RECOVERY_ATTEMPTS})\")\n time.sleep(delay)\n continue\n\n # All retries exhausted\n print(f\"[Error] API call failed after {MAX_RECOVERY_ATTEMPTS} retries: {e}\")\n return\n\n except (ConnectionError, TimeoutError, OSError) as e:\n # Strategy 3: network-level errors -> backoff\n if attempt < MAX_RECOVERY_ATTEMPTS:\n delay = backoff_delay(attempt)\n print(f\"[Recovery] Connection error: {e}. \"\n f\"Retrying in {delay:.1f}s (attempt {attempt + 1}/{MAX_RECOVERY_ATTEMPTS})\")\n time.sleep(delay)\n continue\n\n print(f\"[Error] Connection failed after {MAX_RECOVERY_ATTEMPTS} retries: {e}\")\n return\n\n if response is None:\n print(\"[Error] No response received.\")\n return\n\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n # -- Strategy 1: max_tokens recovery --\n if response.stop_reason == \"max_tokens\":\n max_output_recovery_count += 1\n if max_output_recovery_count <= MAX_RECOVERY_ATTEMPTS:\n print(f\"[Recovery] max_tokens hit \"\n f\"({max_output_recovery_count}/{MAX_RECOVERY_ATTEMPTS}). \"\n \"Injecting continuation...\")\n messages.append({\"role\": \"user\", \"content\": CONTINUATION_MESSAGE})\n continue # retry the loop\n else:\n print(f\"[Error] max_tokens recovery exhausted \"\n f\"({MAX_RECOVERY_ATTEMPTS} attempts). Stopping.\")\n return\n\n # Reset max_tokens counter on successful non-max_tokens response\n max_output_recovery_count = 0\n\n # -- Normal end_turn: no tool use requested --\n if response.stop_reason != \"tool_use\":\n return\n\n # -- Process tool calls --\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n # Check if we should auto-compact (proactive, not just reactive)\n if estimate_tokens(messages) > TOKEN_THRESHOLD:\n print(\"[Recovery] Token estimate exceeds threshold. Auto-compacting...\")\n messages[:] = auto_compact(messages)\n\n\nif __name__ == \"__main__\":\n print(\"[Error recovery enabled: max_tokens / prompt_too_long / connection backoff]\")\n history = []\n while True:\n try:\n query = input(\"\\033[36ms11 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
},
{
- "id": "s07",
- "filename": "s07_task_system.py",
- "title": "Tasks",
- "subtitle": "Task Graph + Dependencies",
- "loc": 207,
+ "id": "s12",
+ "filename": "s12_task_system.py",
+ "title": "Task System",
+ "subtitle": "Durable Work Graph",
+ "loc": 227,
"tools": [
"bash",
"read_file",
@@ -355,56 +840,61 @@
"task_list",
"task_get"
],
- "coreAddition": "TaskManager with file-based state + dependency graph",
- "keyInsight": "A file-based task graph with ordering, parallelism, and dependencies -- the coordination backbone for multi-agent work",
+ "coreAddition": "Task records + dependencies + unlock rules",
+ "keyInsight": "Todo lists help a session; durable task graphs coordinate work that outlives it.",
"classes": [
{
"name": "TaskManager",
- "startLine": 46,
- "endLine": 125
+ "startLine": 65,
+ "endLine": 152
}
],
"functions": [
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
- "startLine": 130
+ "startLine": 157
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
- "startLine": 136
+ "startLine": 163
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
- "startLine": 148
+ "startLine": 175
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
- "startLine": 157
+ "startLine": 184
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
- "startLine": 166
+ "startLine": 193
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 209
+ "startLine": 236
}
],
- "layer": "planning",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns07_task_system.py - Tasks\n\nTasks persist as JSON files in .tasks/ so they survive context compression.\nEach task has a dependency graph (blockedBy/blocks).\n\n .tasks/\n task_1.json {\"id\":1, \"subject\":\"...\", \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\", ...}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"blocks\":[], ...}\n\n Dependency resolution:\n +----------+ +----------+ +----------+\n | task 1 | --> | task 2 | --> | task 3 |\n | complete | | blocked | | blocked |\n +----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from task 2's blockedBy\n\nKey insight: \"State that survives compression -- because it's outside the conversation.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTASKS_DIR = WORKDIR / \".tasks\"\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use task tools to plan and track work.\"\n\n\n# -- TaskManager: CRUD with dependency graph, persisted as JSON files --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = [int(f.stem.split(\"_\")[1]) for f in self.dir.glob(\"task_*.json\")]\n return max(ids) if ids else 0\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"blockedBy\": [], \"blocks\": [], \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def update(self, task_id: int, status: str = None,\n add_blocked_by: list = None, add_blocks: list = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n # When a task is completed, remove it from all other tasks' blockedBy\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n # Bidirectional: also update the blocked tasks' blockedBy lists\n for blocked_id in add_blocks:\n try:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n except ValueError:\n pass\n self._save(task)\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id: int):\n \"\"\"Remove completed_id from all other tasks' blockedBy lists.\"\"\"\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n blocked = f\" (blocked by: {t['blockedBy']})\" if t.get(\"blockedBy\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{blocked}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(TASKS_DIR)\n\n\n# -- Base tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"addBlockedBy\"), kw.get(\"addBlocks\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"Create a new task.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_update\", \"description\": \"Update a task's status or dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}, \"addBlockedBy\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}, \"addBlocks\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_list\", \"description\": \"List all tasks with status summary.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"Get full details of a task by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms07 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ "layer": "runtime",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: persistent tasks -- goals that outlive any single conversation.\n\"\"\"\ns12_task_system.py - Tasks\n\nTasks persist as JSON files in .tasks/ so they survive context compression.\nEach task carries a small dependency graph:\n\n- blockedBy: what must finish first\n- blocks: what this task unlocks later\n\n .tasks/\n task_1.json {\"id\":1, \"subject\":\"...\", \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\", ...}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"blocks\":[], ...}\n\n Dependency resolution:\n +----------+ +----------+ +----------+\n | task 1 | --> | task 2 | --> | task 3 |\n | complete | | blocked | | blocked |\n +----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from task 2's blockedBy\n\nKey idea: task state survives compression because it lives on disk, not only\ninside the conversation.\nThese are durable work-graph tasks, not transient runtime execution slots.\n\nRead this file in this order:\n1. TaskManager: what a TaskRecord looks like on disk.\n2. TOOL_HANDLERS / TOOLS: how task operations enter the same loop as normal tools.\n3. agent_loop: how persistent work state is exposed back to the model.\n\nMost common confusion:\n- a task record is a durable work item\n- it is not a thread, background slot, or worker process\n\nTeaching boundary:\nthis chapter teaches the durable work graph first.\nRuntime execution slots and schedulers arrive later.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTASKS_DIR = WORKDIR / \".tasks\"\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use task tools to plan and track work.\"\n\n\n# -- TaskManager: CRUD for a persistent task graph --\nclass TaskManager:\n \"\"\"Persistent TaskRecord store.\n\n Think \"work graph on disk\", not \"currently running worker\".\n \"\"\"\n\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = [int(f.stem.split(\"_\")[1]) for f in self.dir.glob(\"task_*.json\")]\n return max(ids) if ids else 0\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"blockedBy\": [], \"blocks\": [], \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def update(self, task_id: int, status: str = None, owner: str = None,\n add_blocked_by: list = None, add_blocks: list = None) -> str:\n task = self._load(task_id)\n if owner is not None:\n task[\"owner\"] = owner\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\", \"deleted\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n # When a task is completed, remove it from all other tasks' blockedBy\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n # Bidirectional: also update the blocked tasks' blockedBy lists\n for blocked_id in add_blocks:\n try:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n except ValueError:\n pass\n self._save(task)\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id: int):\n \"\"\"Remove completed_id from all other tasks' blockedBy lists.\"\"\"\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\", \"deleted\": \"[-]\"}.get(t[\"status\"], \"[?]\")\n blocked = f\" (blocked by: {t['blockedBy']})\" if t.get(\"blockedBy\") else \"\"\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{blocked}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(TASKS_DIR)\n\n\n# -- Base tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\"), kw.get(\"addBlockedBy\"), kw.get(\"addBlocks\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"Create a new task.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_update\", \"description\": \"Update a task's status, owner, or dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\", \"deleted\"]}, \"owner\": {\"type\": \"string\", \"description\": \"Set when a teammate claims the task\"}, \"addBlockedBy\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}, \"addBlocks\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_list\", \"description\": \"List all tasks with status summary.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"Get full details of a task by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): persistent tasks(持久任务)——让目标跨越单次会话存在。\n\"\"\"\ns12_task_system.py - Tasks(任务系统)\n\n任务以 JSON 文件形式持久化在 `.tasks/` 中,因此可跨越上下文压缩。\n每个任务都携带轻量依赖图:\n\n- blockedBy:what must finish first(先决依赖)\n- blocks:what this task unlocks later(后续解锁项)\n\n .tasks/\n task_1.json {\"id\":1, \"subject\":\"...\", \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\", ...}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"blocks\":[], ...}\n\n 依赖解析(Dependency resolution):\n +----------+ +----------+ +----------+\n | task 1 | --> | task 2 | --> | task 3 |\n | 已完成 | | 被阻塞 | | 被阻塞 |\n +----------+ +----------+ +----------+\n | ^\n +--- 完成 task 1 后,会从 task 2 的 blockedBy 中移除它\n\n核心观点:任务状态能跨越压缩,因为它存储在磁盘而不只在会话消息里。\n这里是 durable work-graph(持久工作图)任务,不是临时执行槽位。\n\n建议阅读顺序:\n1. TaskManager:磁盘上的 TaskRecord 长什么样;\n2. TOOL_HANDLERS / TOOLS:任务操作如何进入统一工具循环;\n3. agent_loop:持久工作状态如何回传给模型。\n\n最常见混淆点:\n- task record 是持久工作项;\n- 它不是线程、后台槽位或 worker 进程。\n\n教学边界:\n本章先讲持久工作图;\n运行时执行槽位与调度器在后续章节展开。\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\nTASKS_DIR = WORKDIR / \".tasks\"\n\nSYSTEM = f\"你是位于 {WORKDIR} 的 coding agent(编码智能体),请使用任务工具进行规划与跟踪。\"\n\n\n# -- TaskManager:持久任务图的 CRUD --\nclass TaskManager:\n \"\"\"持久化 TaskRecord(任务记录)存储。\n\n 将它理解为“磁盘上的工作图”,而不是“正在运行的 worker”。\n \"\"\"\n\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = [int(f.stem.split(\"_\")[1]) for f in self.dir.glob(\"task_*.json\")]\n return max(ids) if ids else 0\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"blockedBy\": [], \"blocks\": [], \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def update(self, task_id: int, status: str = None, owner: str = None,\n add_blocked_by: list = None, add_blocks: list = None) -> str:\n task = self._load(task_id)\n if owner is not None:\n task[\"owner\"] = owner\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\", \"deleted\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n # 任务完成后,从其他任务的 blockedBy 中移除它\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n # 双向维护:同步更新被阻塞任务的 blockedBy\n for blocked_id in add_blocks:\n try:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n except ValueError:\n pass\n self._save(task)\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id: int):\n \"\"\"把 completed_id 从其他任务的 blockedBy 中清除。\"\"\"\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\", \"deleted\": \"[-]\"}.get(t[\"status\"], \"[?]\")\n blocked = f\" (blocked by: {t['blockedBy']})\" if t.get(\"blockedBy\") else \"\"\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{blocked}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(TASKS_DIR)\n\n\n# -- 基础工具实现 --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\"), kw.get(\"addBlockedBy\"), kw.get(\"addBlocks\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"创建新任务。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_update\", \"description\": \"更新任务状态、owner 或依赖关系。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\", \"deleted\"]}, \"owner\": {\"type\": \"string\", \"description\": \"队友认领任务时填写\"}, \"addBlockedBy\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}, \"addBlocks\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_list\", \"description\": \"按状态汇总列出全部任务。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"按 ID 获取任务完整信息。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: persistent tasks -- goals that outlive any single conversation.\n\"\"\"\ns12_task_system.py - Tasks\n\nTasks persist as JSON files in .tasks/ so they survive context compression.\nEach task carries a small dependency graph:\n\n- blockedBy: what must finish first\n- blocks: what this task unlocks later\n\n .tasks/\n task_1.json {\"id\":1, \"subject\":\"...\", \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\", ...}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"blocks\":[], ...}\n\n Dependency resolution:\n +----------+ +----------+ +----------+\n | task 1 | --> | task 2 | --> | task 3 |\n | complete | | blocked | | blocked |\n +----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from task 2's blockedBy\n\nKey idea: task state survives compression because it lives on disk, not only\ninside the conversation.\nThese are durable work-graph tasks, not transient runtime execution slots.\n\nRead this file in this order:\n1. TaskManager: what a TaskRecord looks like on disk.\n2. TOOL_HANDLERS / TOOLS: how task operations enter the same loop as normal tools.\n3. agent_loop: how persistent work state is exposed back to the model.\n\nMost common confusion:\n- a task record is a durable work item\n- it is not a thread, background slot, or worker process\n\nTeaching boundary:\nthis chapter teaches the durable work graph first.\nRuntime execution slots and schedulers arrive later.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTASKS_DIR = WORKDIR / \".tasks\"\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use task tools to plan and track work.\"\n\n\n# -- TaskManager: CRUD for a persistent task graph --\nclass TaskManager:\n \"\"\"Persistent TaskRecord store.\n\n Think \"work graph on disk\", not \"currently running worker\".\n \"\"\"\n\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = [int(f.stem.split(\"_\")[1]) for f in self.dir.glob(\"task_*.json\")]\n return max(ids) if ids else 0\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"blockedBy\": [], \"blocks\": [], \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def update(self, task_id: int, status: str = None, owner: str = None,\n add_blocked_by: list = None, add_blocks: list = None) -> str:\n task = self._load(task_id)\n if owner is not None:\n task[\"owner\"] = owner\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\", \"deleted\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n # When a task is completed, remove it from all other tasks' blockedBy\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n # Bidirectional: also update the blocked tasks' blockedBy lists\n for blocked_id in add_blocks:\n try:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n except ValueError:\n pass\n self._save(task)\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id: int):\n \"\"\"Remove completed_id from all other tasks' blockedBy lists.\"\"\"\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\", \"deleted\": \"[-]\"}.get(t[\"status\"], \"[?]\")\n blocked = f\" (blocked by: {t['blockedBy']})\" if t.get(\"blockedBy\") else \"\"\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{blocked}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(TASKS_DIR)\n\n\n# -- Base tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\"), kw.get(\"addBlockedBy\"), kw.get(\"addBlocks\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"Create a new task.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_update\", \"description\": \"Update a task's status, owner, or dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\", \"deleted\"]}, \"owner\": {\"type\": \"string\", \"description\": \"Set when a teammate claims the task\"}, \"addBlockedBy\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}, \"addBlocks\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_list\", \"description\": \"List all tasks with status summary.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"Get full details of a task by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: persistent tasks -- goals that outlive any single conversation.\n\"\"\"\ns12_task_system.py - Tasks\n\nTasks persist as JSON files in .tasks/ so they survive context compression.\nEach task carries a small dependency graph:\n\n- blockedBy: what must finish first\n- blocks: what this task unlocks later\n\n .tasks/\n task_1.json {\"id\":1, \"subject\":\"...\", \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\", ...}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"blocks\":[], ...}\n\n Dependency resolution:\n +----------+ +----------+ +----------+\n | task 1 | --> | task 2 | --> | task 3 |\n | complete | | blocked | | blocked |\n +----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from task 2's blockedBy\n\nKey idea: task state survives compression because it lives on disk, not only\ninside the conversation.\nThese are durable work-graph tasks, not transient runtime execution slots.\n\nRead this file in this order:\n1. TaskManager: what a TaskRecord looks like on disk.\n2. TOOL_HANDLERS / TOOLS: how task operations enter the same loop as normal tools.\n3. agent_loop: how persistent work state is exposed back to the model.\n\nMost common confusion:\n- a task record is a durable work item\n- it is not a thread, background slot, or worker process\n\nTeaching boundary:\nthis chapter teaches the durable work graph first.\nRuntime execution slots and schedulers arrive later.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTASKS_DIR = WORKDIR / \".tasks\"\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use task tools to plan and track work.\"\n\n\n# -- TaskManager: CRUD for a persistent task graph --\nclass TaskManager:\n \"\"\"Persistent TaskRecord store.\n\n Think \"work graph on disk\", not \"currently running worker\".\n \"\"\"\n\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = [int(f.stem.split(\"_\")[1]) for f in self.dir.glob(\"task_*.json\")]\n return max(ids) if ids else 0\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"blockedBy\": [], \"blocks\": [], \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def update(self, task_id: int, status: str = None, owner: str = None,\n add_blocked_by: list = None, add_blocks: list = None) -> str:\n task = self._load(task_id)\n if owner is not None:\n task[\"owner\"] = owner\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\", \"deleted\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n # When a task is completed, remove it from all other tasks' blockedBy\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n # Bidirectional: also update the blocked tasks' blockedBy lists\n for blocked_id in add_blocks:\n try:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n except ValueError:\n pass\n self._save(task)\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id: int):\n \"\"\"Remove completed_id from all other tasks' blockedBy lists.\"\"\"\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\", \"deleted\": \"[-]\"}.get(t[\"status\"], \"[?]\")\n blocked = f\" (blocked by: {t['blockedBy']})\" if t.get(\"blockedBy\") else \"\"\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{blocked}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(TASKS_DIR)\n\n\n# -- Base tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\"), kw.get(\"addBlockedBy\"), kw.get(\"addBlocks\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"Create a new task.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_update\", \"description\": \"Update a task's status, owner, or dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\", \"deleted\"]}, \"owner\": {\"type\": \"string\", \"description\": \"Set when a teammate claims the task\"}, \"addBlockedBy\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}, \"addBlocks\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_list\", \"description\": \"List all tasks with status summary.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"Get full details of a task by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
},
{
- "id": "s08",
- "filename": "s08_background_tasks.py",
+ "id": "s13",
+ "filename": "s13_background_tasks.py",
"title": "Background Tasks",
- "subtitle": "Background Threads + Notifications",
- "loc": 198,
+ "subtitle": "Separate Goal from Running Work",
+ "loc": 287,
"tools": [
"bash",
"read_file",
@@ -417,56 +907,150 @@
"background_run",
"check_background"
],
- "coreAddition": "BackgroundManager + notification queue",
- "keyInsight": "Run slow operations in the background; the agent keeps thinking ahead",
+ "coreAddition": "RuntimeTaskState + async execution slots",
+ "keyInsight": "Background execution is a runtime lane, not a second main loop.",
"classes": [
+ {
+ "name": "NotificationQueue",
+ "startLine": 56,
+ "endLine": 87
+ },
{
"name": "BackgroundManager",
- "startLine": 49,
- "endLine": 109
+ "startLine": 88,
+ "endLine": 211
}
],
"functions": [
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
- "startLine": 114
+ "startLine": 216
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
- "startLine": 120
+ "startLine": 222
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
- "startLine": 132
+ "startLine": 234
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
- "startLine": 141
+ "startLine": 243
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
- "startLine": 150
+ "startLine": 252
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 187
+ "startLine": 289
}
],
- "layer": "concurrency",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns08_background_tasks.py - Background Tasks\n\nRun commands in background threads. A notification queue is drained\nbefore each LLM call to deliver results.\n\n Main thread Background thread\n +-----------------+ +-----------------+\n | agent loop | | task executes |\n | ... | | ... |\n | [LLM call] <---+------- | enqueue(result) |\n | ^drain queue | +-----------------+\n +-----------------+\n\n Timeline:\n Agent ----[spawn A]----[spawn B]----[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- notification queue --> [results injected]\n\nKey insight: \"Fire and forget -- the agent doesn't block while the command runs.\"\n\"\"\"\n\nimport os\nimport subprocess\nimport threading\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use background_run for long-running commands.\"\n\n\n# -- BackgroundManager: threaded execution + notification queue --\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {} # task_id -> {status, result, command}\n self._notification_queue = [] # completed task results\n self._lock = threading.Lock()\n\n def run(self, command: str) -> str:\n \"\"\"Start a background thread, return task_id immediately.\"\"\"\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"result\": None, \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True\n )\n thread.start()\n return f\"Background task {task_id} started: {command[:80]}\"\n\n def _execute(self, task_id: str, command: str):\n \"\"\"Thread target: run subprocess, capture output, push to queue.\"\"\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300\n )\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n except Exception as e:\n output = f\"Error: {e}\"\n status = \"error\"\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = output or \"(no output)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"command\": command[:80],\n \"result\": (output or \"(no output)\")[:500],\n })\n\n def check(self, task_id: str = None) -> str:\n \"\"\"Check status of one task or list all.\"\"\"\n if task_id:\n t = self.tasks.get(task_id)\n if not t:\n return f\"Error: Unknown task {task_id}\"\n return f\"[{t['status']}] {t['command'][:60]}\\n{t.get('result') or '(running)'}\"\n lines = []\n for tid, t in self.tasks.items():\n lines.append(f\"{tid}: [{t['status']}] {t['command'][:60]}\")\n return \"\\n\".join(lines) if lines else \"No background tasks.\"\n\n def drain_notifications(self) -> list:\n \"\"\"Return and clear all pending completion notifications.\"\"\"\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n\n\nBG = BackgroundManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"background_run\": lambda **kw: BG.run(kw[\"command\"]),\n \"check_background\": lambda **kw: BG.check(kw.get(\"task_id\")),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command (blocking).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"background_run\", \"description\": \"Run command in background thread. Returns task_id immediately.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"check_background\", \"description\": \"Check background task status. Omit task_id to list all.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"string\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Drain background notifications and inject as system message before LLM call\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: {n['result']}\" for n in notifs\n )\n messages.append({\"role\": \"user\", \"content\": f\"\\n{notif_text}\\n \"})\n messages.append({\"role\": \"assistant\", \"content\": \"Noted background results.\"})\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ "layer": "runtime",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: background execution -- the model thinks while the harness waits.\n\"\"\"\ns13_background_tasks.py - Background Tasks\n\nRun slow commands in background threads. Before each LLM call, the loop\ndrains a notification queue and hands finished results back to the model.\n\n Main thread Background thread\n +-----------------+ +-----------------+\n | agent loop | | task executes |\n | ... | | ... |\n | [LLM call] <---+------- | enqueue(result) |\n | ^drain queue | +-----------------+\n +-----------------+\n\n Timeline:\n Agent ----[spawn A]----[spawn B]----[other work]----\n | |\n v v\n [A runs] [B runs]\n | |\n +-- notification queue --> [results injected]\n\nBackground tasks here are runtime execution slots, not the durable task-board\nrecords introduced in s12.\n\"\"\"\n\nimport os\nimport json\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nRUNTIME_DIR = WORKDIR / \".runtime-tasks\"\nRUNTIME_DIR.mkdir(exist_ok=True)\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use background_run for long-running commands.\"\n\nSTALL_THRESHOLD_S = 45 # seconds before a task is considered stalled\n\n\nclass NotificationQueue:\n \"\"\"\n Priority-based notification queue with same-key folding.\n\n Folding means a newer message can replace an older message with the\n same key, so the context is not flooded with stale updates.\n \"\"\"\n\n PRIORITIES = {\"immediate\": 0, \"high\": 1, \"medium\": 2, \"low\": 3}\n\n def __init__(self):\n self._queue = [] # list of (priority, key, message)\n self._lock = threading.Lock()\n\n def push(self, message: str, priority: str = \"medium\", key: str = None):\n \"\"\"Add a message to the queue, folding if key matches an existing entry.\"\"\"\n with self._lock:\n if key:\n # Fold: replace existing message with same key\n self._queue = [(p, k, m) for p, k, m in self._queue if k != key]\n self._queue.append((self.PRIORITIES.get(priority, 2), key, message))\n self._queue.sort(key=lambda x: x[0])\n\n def drain(self) -> list[str]:\n \"\"\"Return all pending messages in priority order and clear the queue.\"\"\"\n with self._lock:\n messages = [m for _, _, m in self._queue]\n self._queue.clear()\n return messages\n\n\n# -- BackgroundManager: threaded execution + notification queue --\nclass BackgroundManager:\n def __init__(self):\n self.dir = RUNTIME_DIR\n self.tasks = {} # task_id -> {status, result, command, started_at}\n self._notification_queue = [] # completed task results\n self._lock = threading.Lock()\n\n def _record_path(self, task_id: str) -> Path:\n return self.dir / f\"{task_id}.json\"\n\n def _output_path(self, task_id: str) -> Path:\n return self.dir / f\"{task_id}.log\"\n\n def _persist_task(self, task_id: str):\n record = dict(self.tasks[task_id])\n self._record_path(task_id).write_text(\n json.dumps(record, indent=2, ensure_ascii=False)\n )\n\n def _preview(self, output: str, limit: int = 500) -> str:\n compact = \" \".join((output or \"(no output)\").split())\n return compact[:limit]\n\n def run(self, command: str) -> str:\n \"\"\"Start a background thread, return task_id immediately.\"\"\"\n task_id = str(uuid.uuid4())[:8]\n output_file = self._output_path(task_id)\n self.tasks[task_id] = {\n \"id\": task_id,\n \"status\": \"running\",\n \"result\": None,\n \"command\": command,\n \"started_at\": time.time(),\n \"finished_at\": None,\n \"result_preview\": \"\",\n \"output_file\": str(output_file.relative_to(WORKDIR)),\n }\n self._persist_task(task_id)\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True\n )\n thread.start()\n return (\n f\"Background task {task_id} started: {command[:80]} \"\n f\"(output_file={output_file.relative_to(WORKDIR)})\"\n )\n\n def _execute(self, task_id: str, command: str):\n \"\"\"Thread target: run subprocess, capture output, push to queue.\"\"\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300\n )\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n except Exception as e:\n output = f\"Error: {e}\"\n status = \"error\"\n final_output = output or \"(no output)\"\n preview = self._preview(final_output)\n output_path = self._output_path(task_id)\n output_path.write_text(final_output)\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = final_output\n self.tasks[task_id][\"finished_at\"] = time.time()\n self.tasks[task_id][\"result_preview\"] = preview\n self._persist_task(task_id)\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"command\": command[:80],\n \"preview\": preview,\n \"output_file\": str(output_path.relative_to(WORKDIR)),\n })\n\n def check(self, task_id: str = None) -> str:\n \"\"\"Check status of one task or list all.\"\"\"\n if task_id:\n t = self.tasks.get(task_id)\n if not t:\n return f\"Error: Unknown task {task_id}\"\n visible = {\n \"id\": t[\"id\"],\n \"status\": t[\"status\"],\n \"command\": t[\"command\"],\n \"result_preview\": t.get(\"result_preview\", \"\"),\n \"output_file\": t.get(\"output_file\", \"\"),\n }\n return json.dumps(visible, indent=2, ensure_ascii=False)\n lines = []\n for tid, t in self.tasks.items():\n lines.append(\n f\"{tid}: [{t['status']}] {t['command'][:60]} \"\n f\"-> {t.get('result_preview') or '(running)'}\"\n )\n return \"\\n\".join(lines) if lines else \"No background tasks.\"\n\n def drain_notifications(self) -> list:\n \"\"\"Return and clear all pending completion notifications.\"\"\"\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n\n def detect_stalled(self) -> list[str]:\n \"\"\"\n Return task IDs that have been running longer than STALL_THRESHOLD_S.\n \"\"\"\n now = time.time()\n stalled = []\n for task_id, info in self.tasks.items():\n if info[\"status\"] != \"running\":\n continue\n elapsed = now - info.get(\"started_at\", now)\n if elapsed > STALL_THRESHOLD_S:\n stalled.append(task_id)\n return stalled\n\n\nBG = BackgroundManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"background_run\": lambda **kw: BG.run(kw[\"command\"]),\n \"check_background\": lambda **kw: BG.check(kw.get(\"task_id\")),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command (blocking).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"background_run\", \"description\": \"Run command in background thread. Returns task_id immediately.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"check_background\", \"description\": \"Check background task status. Omit task_id to list all.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"string\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Drain background notifications and inject as a synthetic user/assistant\n # transcript pair before the next model call (teaching demo behavior).\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: {n['preview']} \"\n f\"(output_file={n['output_file']})\"\n for n in notifs\n )\n messages.append({\"role\": \"user\", \"content\": f\"\\n{notif_text}\\n \"})\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms13 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): background execution(后台执行)——模型思考时,框架在等待。\n\"\"\"\ns13_background_tasks.py - Background Tasks(后台任务)\n\n耗时命令在后台线程执行。每次 LLM 调用前,\n循环都会清空通知队列并把完成结果回传给模型。\n\n 主线程(Main thread) 后台线程(Background thread)\n +-----------------+ +-----------------+\n | agent loop(主循环) | | task executes(后台执行) |\n | ... | | ... |\n | [LLM call] <---+------- | enqueue(result) |\n | ^drain queue(清空队列)| +-----------------+\n +-----------------+\n\n 时间线(Timeline):\n Agent ----[spawn A]----[spawn B]----[other work(其他工作)]----\n | |\n v v\n [A runs(A 执行)] [B runs(B 执行)]\n | |\n +-- notification queue(通知队列)--> [results injected(结果注入)]\n\n本章的后台任务是 runtime execution slots(运行时执行槽位),\n不是 s12 引入的持久任务板记录。\n\"\"\"\n\nimport os\nimport json\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\n\nWORKDIR = Path.cwd()\nRUNTIME_DIR = WORKDIR / \".runtime-tasks\"\nRUNTIME_DIR.mkdir(exist_ok=True)\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"你是位于 {WORKDIR} 的 coding agent(编码智能体),耗时命令请使用 background_run。\"\n\nSTALL_THRESHOLD_S = 45 # 超过该秒数仍未结束则判定为 stalled(卡住)\n\n\nclass NotificationQueue:\n \"\"\"\n 带优先级与同键折叠(same-key folding)的通知队列。\n\n 折叠意味着:同 key 的新消息会替换旧消息,\n 防止过时更新淹没上下文。\n \"\"\"\n\n PRIORITIES = {\"immediate\": 0, \"high\": 1, \"medium\": 2, \"low\": 3}\n\n def __init__(self):\n self._queue = [] # 队列项结构:(priority, key, message)\n self._lock = threading.Lock()\n\n def push(self, message: str, priority: str = \"medium\", key: str = None):\n \"\"\"写入消息;若 key 已存在则触发折叠替换。\"\"\"\n with self._lock:\n if key:\n # 折叠:替换同 key 的旧消息\n self._queue = [(p, k, m) for p, k, m in self._queue if k != key]\n self._queue.append((self.PRIORITIES.get(priority, 2), key, message))\n self._queue.sort(key=lambda x: x[0])\n\n def drain(self) -> list[str]:\n \"\"\"按优先级返回全部待处理消息,并清空队列。\"\"\"\n with self._lock:\n messages = [m for _, _, m in self._queue]\n self._queue.clear()\n return messages\n\n\n# -- BackgroundManager:线程执行 + 通知队列 --\nclass BackgroundManager:\n def __init__(self):\n self.dir = RUNTIME_DIR\n self.tasks = {} # task_id(任务 ID)-> {status, result, command, started_at}\n self._notification_queue = [] # 已完成任务的结果通知\n self._lock = threading.Lock()\n\n def _record_path(self, task_id: str) -> Path:\n return self.dir / f\"{task_id}.json\"\n\n def _output_path(self, task_id: str) -> Path:\n return self.dir / f\"{task_id}.log\"\n\n def _persist_task(self, task_id: str):\n record = dict(self.tasks[task_id])\n self._record_path(task_id).write_text(\n json.dumps(record, indent=2, ensure_ascii=False)\n )\n\n def _preview(self, output: str, limit: int = 500) -> str:\n compact = \" \".join((output or \"(no output)\").split())\n return compact[:limit]\n\n def run(self, command: str) -> str:\n \"\"\"启动后台线程并立即返回 task_id。\"\"\"\n task_id = str(uuid.uuid4())[:8]\n output_file = self._output_path(task_id)\n self.tasks[task_id] = {\n \"id\": task_id,\n \"status\": \"running\",\n \"result\": None,\n \"command\": command,\n \"started_at\": time.time(),\n \"finished_at\": None,\n \"result_preview\": \"\",\n \"output_file\": str(output_file.relative_to(WORKDIR)),\n }\n self._persist_task(task_id)\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True\n )\n thread.start()\n return (\n f\"Background task {task_id} started: {command[:80]} \"\n f\"(output_file={output_file.relative_to(WORKDIR)})\"\n )\n\n def _execute(self, task_id: str, command: str):\n \"\"\"线程目标:执行子进程、捕获输出、推送通知。\"\"\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300\n )\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n except Exception as e:\n output = f\"Error: {e}\"\n status = \"error\"\n final_output = output or \"(no output)\"\n preview = self._preview(final_output)\n output_path = self._output_path(task_id)\n output_path.write_text(final_output)\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = final_output\n self.tasks[task_id][\"finished_at\"] = time.time()\n self.tasks[task_id][\"result_preview\"] = preview\n self._persist_task(task_id)\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"command\": command[:80],\n \"preview\": preview,\n \"output_file\": str(output_path.relative_to(WORKDIR)),\n })\n\n def check(self, task_id: str = None) -> str:\n \"\"\"查询单个任务状态,或列出全部任务。\"\"\"\n if task_id:\n t = self.tasks.get(task_id)\n if not t:\n return f\"Error: Unknown task {task_id}\"\n visible = {\n \"id\": t[\"id\"],\n \"status\": t[\"status\"],\n \"command\": t[\"command\"],\n \"result_preview\": t.get(\"result_preview\", \"\"),\n \"output_file\": t.get(\"output_file\", \"\"),\n }\n return json.dumps(visible, indent=2, ensure_ascii=False)\n lines = []\n for tid, t in self.tasks.items():\n lines.append(\n f\"{tid}: [{t['status']}] {t['command'][:60]} \"\n f\"-> {t.get('result_preview') or '(running)'}\"\n )\n return \"\\n\".join(lines) if lines else \"No background tasks.\"\n\n def drain_notifications(self) -> list:\n \"\"\"读取并清空所有待处理完成通知。\"\"\"\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n\n def detect_stalled(self) -> list[str]:\n \"\"\"\n 返回运行时间超过 STALL_THRESHOLD_S 的任务 ID 列表。\n \"\"\"\n now = time.time()\n stalled = []\n for task_id, info in self.tasks.items():\n if info[\"status\"] != \"running\":\n continue\n elapsed = now - info.get(\"started_at\", now)\n if elapsed > STALL_THRESHOLD_S:\n stalled.append(task_id)\n return stalled\n\n\nBG = BackgroundManager()\n\n\n# -- 工具实现 --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"background_run\": lambda **kw: BG.run(kw[\"command\"]),\n \"check_background\": lambda **kw: BG.check(kw.get(\"task_id\")),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令(阻塞执行)。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"background_run\", \"description\": \"在后台线程执行命令,并立即返回 task_id。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"check_background\", \"description\": \"检查后台任务状态;省略 task_id 则列出全部。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"string\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # 清空后台通知,并在下一次模型调用前注入合成消息(教学演示行为)。\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: {n['preview']} \"\n f\"(output_file={n['output_file']})\"\n for n in notifs\n )\n messages.append({\"role\": \"user\", \"content\": f\"\\n{notif_text}\\n \"})\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms13 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: background execution -- the model thinks while the harness waits.\n\"\"\"\ns13_background_tasks.py - Background Tasks\n\nRun slow commands in background threads. Before each LLM call, the loop\ndrains a notification queue and hands finished results back to the model.\n\n Main thread Background thread\n +-----------------+ +-----------------+\n | agent loop | | task executes |\n | ... | | ... |\n | [LLM call] <---+------- | enqueue(result) |\n | ^drain queue | +-----------------+\n +-----------------+\n\n Timeline:\n Agent ----[spawn A]----[spawn B]----[other work]----\n | |\n v v\n [A runs] [B runs]\n | |\n +-- notification queue --> [results injected]\n\nBackground tasks here are runtime execution slots, not the durable task-board\nrecords introduced in s12.\n\"\"\"\n\nimport os\nimport json\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nRUNTIME_DIR = WORKDIR / \".runtime-tasks\"\nRUNTIME_DIR.mkdir(exist_ok=True)\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use background_run for long-running commands.\"\n\nSTALL_THRESHOLD_S = 45 # seconds before a task is considered stalled\n\n\nclass NotificationQueue:\n \"\"\"\n Priority-based notification queue with same-key folding.\n\n Folding means a newer message can replace an older message with the\n same key, so the context is not flooded with stale updates.\n \"\"\"\n\n PRIORITIES = {\"immediate\": 0, \"high\": 1, \"medium\": 2, \"low\": 3}\n\n def __init__(self):\n self._queue = [] # list of (priority, key, message)\n self._lock = threading.Lock()\n\n def push(self, message: str, priority: str = \"medium\", key: str = None):\n \"\"\"Add a message to the queue, folding if key matches an existing entry.\"\"\"\n with self._lock:\n if key:\n # Fold: replace existing message with same key\n self._queue = [(p, k, m) for p, k, m in self._queue if k != key]\n self._queue.append((self.PRIORITIES.get(priority, 2), key, message))\n self._queue.sort(key=lambda x: x[0])\n\n def drain(self) -> list[str]:\n \"\"\"Return all pending messages in priority order and clear the queue.\"\"\"\n with self._lock:\n messages = [m for _, _, m in self._queue]\n self._queue.clear()\n return messages\n\n\n# -- BackgroundManager: threaded execution + notification queue --\nclass BackgroundManager:\n def __init__(self):\n self.dir = RUNTIME_DIR\n self.tasks = {} # task_id -> {status, result, command, started_at}\n self._notification_queue = [] # completed task results\n self._lock = threading.Lock()\n\n def _record_path(self, task_id: str) -> Path:\n return self.dir / f\"{task_id}.json\"\n\n def _output_path(self, task_id: str) -> Path:\n return self.dir / f\"{task_id}.log\"\n\n def _persist_task(self, task_id: str):\n record = dict(self.tasks[task_id])\n self._record_path(task_id).write_text(\n json.dumps(record, indent=2, ensure_ascii=False)\n )\n\n def _preview(self, output: str, limit: int = 500) -> str:\n compact = \" \".join((output or \"(no output)\").split())\n return compact[:limit]\n\n def run(self, command: str) -> str:\n \"\"\"Start a background thread, return task_id immediately.\"\"\"\n task_id = str(uuid.uuid4())[:8]\n output_file = self._output_path(task_id)\n self.tasks[task_id] = {\n \"id\": task_id,\n \"status\": \"running\",\n \"result\": None,\n \"command\": command,\n \"started_at\": time.time(),\n \"finished_at\": None,\n \"result_preview\": \"\",\n \"output_file\": str(output_file.relative_to(WORKDIR)),\n }\n self._persist_task(task_id)\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True\n )\n thread.start()\n return (\n f\"Background task {task_id} started: {command[:80]} \"\n f\"(output_file={output_file.relative_to(WORKDIR)})\"\n )\n\n def _execute(self, task_id: str, command: str):\n \"\"\"Thread target: run subprocess, capture output, push to queue.\"\"\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300\n )\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n except Exception as e:\n output = f\"Error: {e}\"\n status = \"error\"\n final_output = output or \"(no output)\"\n preview = self._preview(final_output)\n output_path = self._output_path(task_id)\n output_path.write_text(final_output)\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = final_output\n self.tasks[task_id][\"finished_at\"] = time.time()\n self.tasks[task_id][\"result_preview\"] = preview\n self._persist_task(task_id)\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"command\": command[:80],\n \"preview\": preview,\n \"output_file\": str(output_path.relative_to(WORKDIR)),\n })\n\n def check(self, task_id: str = None) -> str:\n \"\"\"Check status of one task or list all.\"\"\"\n if task_id:\n t = self.tasks.get(task_id)\n if not t:\n return f\"Error: Unknown task {task_id}\"\n visible = {\n \"id\": t[\"id\"],\n \"status\": t[\"status\"],\n \"command\": t[\"command\"],\n \"result_preview\": t.get(\"result_preview\", \"\"),\n \"output_file\": t.get(\"output_file\", \"\"),\n }\n return json.dumps(visible, indent=2, ensure_ascii=False)\n lines = []\n for tid, t in self.tasks.items():\n lines.append(\n f\"{tid}: [{t['status']}] {t['command'][:60]} \"\n f\"-> {t.get('result_preview') or '(running)'}\"\n )\n return \"\\n\".join(lines) if lines else \"No background tasks.\"\n\n def drain_notifications(self) -> list:\n \"\"\"Return and clear all pending completion notifications.\"\"\"\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n\n def detect_stalled(self) -> list[str]:\n \"\"\"\n Return task IDs that have been running longer than STALL_THRESHOLD_S.\n \"\"\"\n now = time.time()\n stalled = []\n for task_id, info in self.tasks.items():\n if info[\"status\"] != \"running\":\n continue\n elapsed = now - info.get(\"started_at\", now)\n if elapsed > STALL_THRESHOLD_S:\n stalled.append(task_id)\n return stalled\n\n\nBG = BackgroundManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"background_run\": lambda **kw: BG.run(kw[\"command\"]),\n \"check_background\": lambda **kw: BG.check(kw.get(\"task_id\")),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command (blocking).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"background_run\", \"description\": \"Run command in background thread. Returns task_id immediately.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"check_background\", \"description\": \"Check background task status. Omit task_id to list all.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"string\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Drain background notifications and inject as a synthetic user/assistant\n # transcript pair before the next model call (teaching demo behavior).\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: {n['preview']} \"\n f\"(output_file={n['output_file']})\"\n for n in notifs\n )\n messages.append({\"role\": \"user\", \"content\": f\"\\n{notif_text}\\n \"})\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms13 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: background execution -- the model thinks while the harness waits.\n\"\"\"\ns13_background_tasks.py - Background Tasks\n\nRun slow commands in background threads. Before each LLM call, the loop\ndrains a notification queue and hands finished results back to the model.\n\n Main thread Background thread\n +-----------------+ +-----------------+\n | agent loop | | task executes |\n | ... | | ... |\n | [LLM call] <---+------- | enqueue(result) |\n | ^drain queue | +-----------------+\n +-----------------+\n\n Timeline:\n Agent ----[spawn A]----[spawn B]----[other work]----\n | |\n v v\n [A runs] [B runs]\n | |\n +-- notification queue --> [results injected]\n\nBackground tasks here are runtime execution slots, not the durable task-board\nrecords introduced in s12.\n\"\"\"\n\nimport os\nimport json\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nRUNTIME_DIR = WORKDIR / \".runtime-tasks\"\nRUNTIME_DIR.mkdir(exist_ok=True)\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use background_run for long-running commands.\"\n\nSTALL_THRESHOLD_S = 45 # seconds before a task is considered stalled\n\n\nclass NotificationQueue:\n \"\"\"\n Priority-based notification queue with same-key folding.\n\n Folding means a newer message can replace an older message with the\n same key, so the context is not flooded with stale updates.\n \"\"\"\n\n PRIORITIES = {\"immediate\": 0, \"high\": 1, \"medium\": 2, \"low\": 3}\n\n def __init__(self):\n self._queue = [] # list of (priority, key, message)\n self._lock = threading.Lock()\n\n def push(self, message: str, priority: str = \"medium\", key: str = None):\n \"\"\"Add a message to the queue, folding if key matches an existing entry.\"\"\"\n with self._lock:\n if key:\n # Fold: replace existing message with same key\n self._queue = [(p, k, m) for p, k, m in self._queue if k != key]\n self._queue.append((self.PRIORITIES.get(priority, 2), key, message))\n self._queue.sort(key=lambda x: x[0])\n\n def drain(self) -> list[str]:\n \"\"\"Return all pending messages in priority order and clear the queue.\"\"\"\n with self._lock:\n messages = [m for _, _, m in self._queue]\n self._queue.clear()\n return messages\n\n\n# -- BackgroundManager: threaded execution + notification queue --\nclass BackgroundManager:\n def __init__(self):\n self.dir = RUNTIME_DIR\n self.tasks = {} # task_id -> {status, result, command, started_at}\n self._notification_queue = [] # completed task results\n self._lock = threading.Lock()\n\n def _record_path(self, task_id: str) -> Path:\n return self.dir / f\"{task_id}.json\"\n\n def _output_path(self, task_id: str) -> Path:\n return self.dir / f\"{task_id}.log\"\n\n def _persist_task(self, task_id: str):\n record = dict(self.tasks[task_id])\n self._record_path(task_id).write_text(\n json.dumps(record, indent=2, ensure_ascii=False)\n )\n\n def _preview(self, output: str, limit: int = 500) -> str:\n compact = \" \".join((output or \"(no output)\").split())\n return compact[:limit]\n\n def run(self, command: str) -> str:\n \"\"\"Start a background thread, return task_id immediately.\"\"\"\n task_id = str(uuid.uuid4())[:8]\n output_file = self._output_path(task_id)\n self.tasks[task_id] = {\n \"id\": task_id,\n \"status\": \"running\",\n \"result\": None,\n \"command\": command,\n \"started_at\": time.time(),\n \"finished_at\": None,\n \"result_preview\": \"\",\n \"output_file\": str(output_file.relative_to(WORKDIR)),\n }\n self._persist_task(task_id)\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True\n )\n thread.start()\n return (\n f\"Background task {task_id} started: {command[:80]} \"\n f\"(output_file={output_file.relative_to(WORKDIR)})\"\n )\n\n def _execute(self, task_id: str, command: str):\n \"\"\"Thread target: run subprocess, capture output, push to queue.\"\"\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300\n )\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n except Exception as e:\n output = f\"Error: {e}\"\n status = \"error\"\n final_output = output or \"(no output)\"\n preview = self._preview(final_output)\n output_path = self._output_path(task_id)\n output_path.write_text(final_output)\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = final_output\n self.tasks[task_id][\"finished_at\"] = time.time()\n self.tasks[task_id][\"result_preview\"] = preview\n self._persist_task(task_id)\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"command\": command[:80],\n \"preview\": preview,\n \"output_file\": str(output_path.relative_to(WORKDIR)),\n })\n\n def check(self, task_id: str = None) -> str:\n \"\"\"Check status of one task or list all.\"\"\"\n if task_id:\n t = self.tasks.get(task_id)\n if not t:\n return f\"Error: Unknown task {task_id}\"\n visible = {\n \"id\": t[\"id\"],\n \"status\": t[\"status\"],\n \"command\": t[\"command\"],\n \"result_preview\": t.get(\"result_preview\", \"\"),\n \"output_file\": t.get(\"output_file\", \"\"),\n }\n return json.dumps(visible, indent=2, ensure_ascii=False)\n lines = []\n for tid, t in self.tasks.items():\n lines.append(\n f\"{tid}: [{t['status']}] {t['command'][:60]} \"\n f\"-> {t.get('result_preview') or '(running)'}\"\n )\n return \"\\n\".join(lines) if lines else \"No background tasks.\"\n\n def drain_notifications(self) -> list:\n \"\"\"Return and clear all pending completion notifications.\"\"\"\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n\n def detect_stalled(self) -> list[str]:\n \"\"\"\n Return task IDs that have been running longer than STALL_THRESHOLD_S.\n \"\"\"\n now = time.time()\n stalled = []\n for task_id, info in self.tasks.items():\n if info[\"status\"] != \"running\":\n continue\n elapsed = now - info.get(\"started_at\", now)\n if elapsed > STALL_THRESHOLD_S:\n stalled.append(task_id)\n return stalled\n\n\nBG = BackgroundManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"background_run\": lambda **kw: BG.run(kw[\"command\"]),\n \"check_background\": lambda **kw: BG.check(kw.get(\"task_id\")),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command (blocking).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"background_run\", \"description\": \"Run command in background thread. Returns task_id immediately.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"check_background\", \"description\": \"Check background task status. Omit task_id to list all.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"string\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Drain background notifications and inject as a synthetic user/assistant\n # transcript pair before the next model call (teaching demo behavior).\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: {n['preview']} \"\n f\"(output_file={n['output_file']})\"\n for n in notifs\n )\n messages.append({\"role\": \"user\", \"content\": f\"\\n{notif_text}\\n \"})\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms13 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
},
{
- "id": "s09",
- "filename": "s09_agent_teams.py",
+ "id": "s14",
+ "filename": "s14_cron_scheduler.py",
+ "title": "Cron Scheduler",
+ "subtitle": "Let Time Trigger Work",
+ "loc": 452,
+ "tools": [
+ "bash",
+ "read_file",
+ "write_file",
+ "edit_file",
+ "cron_create",
+ "cron_delete",
+ "cron_list"
+ ],
+ "newTools": [
+ "cron_create",
+ "cron_delete",
+ "cron_list"
+ ],
+ "coreAddition": "Scheduled triggers over runtime tasks",
+ "keyInsight": "Scheduling is not a separate system -- it just feeds the same agent loop from a timer.",
+ "classes": [
+ {
+ "name": "CronLock",
+ "startLine": 87,
+ "endLine": 126
+ },
+ {
+ "name": "CronScheduler",
+ "startLine": 182,
+ "endLine": 393
+ }
+ ],
+ "functions": [
+ {
+ "name": "cron_matches",
+ "signature": "def cron_matches(expr: str, dt: datetime)",
+ "startLine": 127
+ },
+ {
+ "name": "_field_matches",
+ "signature": "def _field_matches(field: str, value: int, lo: int, hi: int)",
+ "startLine": 152
+ },
+ {
+ "name": "safe_path",
+ "signature": "def safe_path(p: str)",
+ "startLine": 398
+ },
+ {
+ "name": "run_bash",
+ "signature": "def run_bash(command: str)",
+ "startLine": 405
+ },
+ {
+ "name": "run_read",
+ "signature": "def run_read(path: str, limit: int = None)",
+ "startLine": 418
+ },
+ {
+ "name": "run_write",
+ "signature": "def run_write(path: str, content: str)",
+ "startLine": 428
+ },
+ {
+ "name": "run_edit",
+ "signature": "def run_edit(path: str, old_text: str, new_text: str)",
+ "startLine": 438
+ },
+ {
+ "name": "agent_loop",
+ "signature": "def agent_loop(messages: list)",
+ "startLine": 488
+ }
+ ],
+ "layer": "runtime",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: time -- the agent schedules its own future work.\n\"\"\"\ns14_cron_scheduler.py - Cron / Scheduled Tasks\n\nThe agent can schedule prompts for future execution using standard cron\nexpressions. When a schedule matches the current time, it pushes a\nnotification back into the main conversation loop.\n\n Cron expression: 5 fields\n +-------+-------+-------+-------+-------+\n | min | hour | dom | month | dow |\n | 0-59 | 0-23 | 1-31 | 1-12 | 0-6 |\n +-------+-------+-------+-------+-------+\n Examples:\n \"*/5 * * * *\" -> every 5 minutes\n \"0 9 * * 1\" -> Monday 9:00 AM\n \"30 14 * * *\" -> daily 2:30 PM\n\n Two persistence modes:\n +--------------------+-------------------------------+\n | session-only | In-memory list, lost on exit |\n | durable | .claude/scheduled_tasks.json |\n +--------------------+-------------------------------+\n\n Two trigger modes:\n +--------------------+-------------------------------+\n | recurring | Repeats until deleted or |\n | | 7-day auto-expiry |\n | one-shot | Fires once, then auto-deleted |\n +--------------------+-------------------------------+\n\n Jitter: recurring tasks can avoid exact minute boundaries.\n\n Architecture:\n +-------------------------------+\n | Background thread |\n | (checks every 1 second) |\n | |\n | for each task: |\n | if cron_matches(now): |\n | enqueue notification |\n +-------------------------------+\n |\n v\n [notification_queue]\n |\n (drained at top of agent_loop)\n |\n v\n [injected as user messages before LLM call]\n\nKey idea: scheduling remembers future work, then hands it back to the\nsame main loop when the time arrives.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom datetime import datetime, timedelta\nfrom pathlib import Path\nfrom queue import Queue, Empty\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSCHEDULED_TASKS_FILE = WORKDIR / \".claude\" / \"scheduled_tasks.json\"\nCRON_LOCK_FILE = WORKDIR / \".claude\" / \"cron.lock\"\nAUTO_EXPIRY_DAYS = 7\nJITTER_MINUTES = [0, 30] # avoid these exact minutes for recurring tasks\nJITTER_OFFSET_MAX = 4 # offset range in minutes\n# Teaching version: use a simple 1-4 minute offset when needed.\n\n\nclass CronLock:\n \"\"\"\n PID-file-based lock to prevent multiple sessions from firing the same cron job.\n \"\"\"\n\n def __init__(self, lock_path: Path = None):\n self._lock_path = lock_path or CRON_LOCK_FILE\n\n def acquire(self) -> bool:\n \"\"\"\n Try to acquire the cron lock. Returns True on success.\n\n If a lock file exists, check whether the PID inside is still alive.\n If the process is dead the lock is stale and we can take over.\n \"\"\"\n if self._lock_path.exists():\n try:\n stored_pid = int(self._lock_path.read_text().strip())\n # PID liveness probe: send signal 0 (no-op) to check existence\n os.kill(stored_pid, 0)\n # Process is alive -- lock is held by another session\n return False\n except (ValueError, ProcessLookupError, PermissionError, OSError):\n # Stale lock (process dead or PID unparseable) -- remove it\n pass\n self._lock_path.parent.mkdir(parents=True, exist_ok=True)\n self._lock_path.write_text(str(os.getpid()))\n return True\n\n def release(self):\n \"\"\"Remove the lock file if it belongs to this process.\"\"\"\n try:\n if self._lock_path.exists():\n stored_pid = int(self._lock_path.read_text().strip())\n if stored_pid == os.getpid():\n self._lock_path.unlink()\n except (ValueError, OSError):\n pass\n\n\ndef cron_matches(expr: str, dt: datetime) -> bool:\n \"\"\"\n Check if a 5-field cron expression matches a given datetime.\n\n Fields: minute hour day-of-month month day-of-week\n Supports: * (any), */N (every N), N (exact), N-M (range), N,M (list)\n\n No external dependencies -- simple manual matching.\n \"\"\"\n fields = expr.strip().split()\n if len(fields) != 5:\n return False\n\n values = [dt.minute, dt.hour, dt.day, dt.month, dt.weekday()]\n # Python weekday: 0=Monday; cron: 0=Sunday. Convert.\n cron_dow = (dt.weekday() + 1) % 7\n values[4] = cron_dow\n ranges = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n\n for field, value, (lo, hi) in zip(fields, values, ranges):\n if not _field_matches(field, value, lo, hi):\n return False\n return True\n\n\ndef _field_matches(field: str, value: int, lo: int, hi: int) -> bool:\n \"\"\"Match a single cron field against a value.\"\"\"\n if field == \"*\":\n return True\n\n for part in field.split(\",\"):\n # Handle step: */N or N-M/S\n step = 1\n if \"/\" in part:\n part, step_str = part.split(\"/\", 1)\n step = int(step_str)\n\n if part == \"*\":\n # */N -- check if value is on the step grid\n if (value - lo) % step == 0:\n return True\n elif \"-\" in part:\n # Range: N-M\n start, end = part.split(\"-\", 1)\n start, end = int(start), int(end)\n if start <= value <= end and (value - start) % step == 0:\n return True\n else:\n # Exact value\n if int(part) == value:\n return True\n\n return False\n\n\nclass CronScheduler:\n \"\"\"\n Manage scheduled tasks with background checking.\n\n Teaching version keeps only the core pieces: schedule records, a\n minute checker, optional persistence, and a notification queue.\n \"\"\"\n\n def __init__(self):\n self.tasks = [] # list of task dicts\n self.queue = Queue() # notification queue\n self._stop_event = threading.Event()\n self._thread = None\n self._last_check_minute = -1 # avoid double-firing within same minute\n\n def start(self):\n \"\"\"Load durable tasks and start the background check thread.\"\"\"\n self._load_durable()\n self._thread = threading.Thread(target=self._check_loop, daemon=True)\n self._thread.start()\n count = len(self.tasks)\n if count:\n print(f\"[Cron] Loaded {count} scheduled tasks\")\n\n def stop(self):\n \"\"\"Stop the background thread.\"\"\"\n self._stop_event.set()\n if self._thread:\n self._thread.join(timeout=2)\n\n def create(self, cron_expr: str, prompt: str,\n recurring: bool = True, durable: bool = False) -> str:\n \"\"\"Create a new scheduled task. Returns the task ID.\"\"\"\n task_id = str(uuid.uuid4())[:8]\n now = time.time()\n\n task = {\n \"id\": task_id,\n \"cron\": cron_expr,\n \"prompt\": prompt,\n \"recurring\": recurring,\n \"durable\": durable,\n \"createdAt\": now,\n }\n\n # Jitter for recurring tasks: if the cron fires on :00 or :30,\n # note it so we can offset the check slightly\n if recurring:\n task[\"jitter_offset\"] = self._compute_jitter(cron_expr)\n\n self.tasks.append(task)\n if durable:\n self._save_durable()\n\n mode = \"recurring\" if recurring else \"one-shot\"\n store = \"durable\" if durable else \"session-only\"\n return f\"Created task {task_id} ({mode}, {store}): cron={cron_expr}\"\n\n def delete(self, task_id: str) -> str:\n \"\"\"Delete a scheduled task by ID.\"\"\"\n before = len(self.tasks)\n self.tasks = [t for t in self.tasks if t[\"id\"] != task_id]\n if len(self.tasks) < before:\n self._save_durable()\n return f\"Deleted task {task_id}\"\n return f\"Task {task_id} not found\"\n\n def list_tasks(self) -> str:\n \"\"\"List all scheduled tasks.\"\"\"\n if not self.tasks:\n return \"No scheduled tasks.\"\n lines = []\n for t in self.tasks:\n mode = \"recurring\" if t[\"recurring\"] else \"one-shot\"\n store = \"durable\" if t[\"durable\"] else \"session\"\n age_hours = (time.time() - t[\"createdAt\"]) / 3600\n lines.append(\n f\" {t['id']} {t['cron']} [{mode}/{store}] \"\n f\"({age_hours:.1f}h old): {t['prompt'][:60]}\"\n )\n return \"\\n\".join(lines)\n\n def drain_notifications(self) -> list[str]:\n \"\"\"Drain all pending notifications from the queue.\"\"\"\n notifications = []\n while True:\n try:\n notifications.append(self.queue.get_nowait())\n except Empty:\n break\n return notifications\n\n def _compute_jitter(self, cron_expr: str) -> int:\n \"\"\"If cron targets :00 or :30, return a small offset (1-4 minutes).\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) < 1:\n return 0\n minute_field = fields[0]\n try:\n minute_val = int(minute_field)\n if minute_val in JITTER_MINUTES:\n # Deterministic jitter based on the expression hash\n return (hash(cron_expr) % JITTER_OFFSET_MAX) + 1\n except ValueError:\n pass\n return 0\n\n def _check_loop(self):\n \"\"\"Background thread: check every second if any task is due.\"\"\"\n while not self._stop_event.is_set():\n now = datetime.now()\n current_minute = now.hour * 60 + now.minute\n\n # Only check once per minute to avoid double-firing\n if current_minute != self._last_check_minute:\n self._last_check_minute = current_minute\n self._check_tasks(now)\n\n self._stop_event.wait(timeout=1)\n\n def _check_tasks(self, now: datetime):\n \"\"\"Check all tasks against current time, fire matches.\"\"\"\n expired = []\n fired_oneshots = []\n\n for task in self.tasks:\n # Auto-expiry: recurring tasks older than 7 days\n age_days = (time.time() - task[\"createdAt\"]) / 86400\n if task[\"recurring\"] and age_days > AUTO_EXPIRY_DAYS:\n expired.append(task[\"id\"])\n continue\n\n # Apply jitter offset for the match check\n check_time = now\n jitter = task.get(\"jitter_offset\", 0)\n if jitter:\n check_time = now - timedelta(minutes=jitter)\n\n if cron_matches(task[\"cron\"], check_time):\n notification = (\n f\"[Scheduled task {task['id']}]: {task['prompt']}\"\n )\n self.queue.put(notification)\n task[\"last_fired\"] = time.time()\n print(f\"[Cron] Fired: {task['id']}\")\n\n if not task[\"recurring\"]:\n fired_oneshots.append(task[\"id\"])\n\n # Clean up expired and one-shot tasks\n if expired or fired_oneshots:\n remove_ids = set(expired) | set(fired_oneshots)\n self.tasks = [t for t in self.tasks if t[\"id\"] not in remove_ids]\n for tid in expired:\n print(f\"[Cron] Auto-expired: {tid} (older than {AUTO_EXPIRY_DAYS} days)\")\n for tid in fired_oneshots:\n print(f\"[Cron] One-shot completed and removed: {tid}\")\n self._save_durable()\n\n def _load_durable(self):\n \"\"\"Load durable tasks from .claude/scheduled_tasks.json.\"\"\"\n if not SCHEDULED_TASKS_FILE.exists():\n return\n try:\n data = json.loads(SCHEDULED_TASKS_FILE.read_text())\n # Only load durable tasks\n self.tasks = [t for t in data if t.get(\"durable\")]\n except Exception as e:\n print(f\"[Cron] Error loading tasks: {e}\")\n\n def detect_missed_tasks(self) -> list[dict]:\n \"\"\"\n On startup, check each durable task's last_fired time.\n\n If a task should have fired while the session was closed (i.e.\n the gap between last_fired and now contains at least one cron match),\n flag it as missed. The caller can then let the user decide whether\n to run or discard each missed task.\n\n \"\"\"\n now = datetime.now()\n missed = []\n for task in self.tasks:\n last_fired = task.get(\"last_fired\")\n if last_fired is None:\n continue\n last_dt = datetime.fromtimestamp(last_fired)\n # Walk forward minute-by-minute from last_fired to now (cap at 24h)\n check = last_dt + timedelta(minutes=1)\n cap = min(now, last_dt + timedelta(hours=24))\n while check <= cap:\n if cron_matches(task[\"cron\"], check):\n missed.append({\n \"id\": task[\"id\"],\n \"cron\": task[\"cron\"],\n \"prompt\": task[\"prompt\"],\n \"missed_at\": check.isoformat(),\n })\n break # one miss is enough to flag it\n check += timedelta(minutes=1)\n return missed\n\n def _save_durable(self):\n \"\"\"Save durable tasks to disk.\"\"\"\n durable = [t for t in self.tasks if t.get(\"durable\")]\n SCHEDULED_TASKS_FILE.parent.mkdir(parents=True, exist_ok=True)\n SCHEDULED_TASKS_FILE.write_text(\n json.dumps(durable, indent=2) + \"\\n\"\n )\n\n\n# Global scheduler\nscheduler = CronScheduler()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"cron_create\": lambda **kw: scheduler.create(\n kw[\"cron\"], kw[\"prompt\"], kw.get(\"recurring\", True), kw.get(\"durable\", False)),\n \"cron_delete\": lambda **kw: scheduler.delete(kw[\"id\"]),\n \"cron_list\": lambda **kw: scheduler.list_tasks(),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"cron_create\", \"description\": \"Schedule a recurring or one-shot task with a cron expression.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\n \"cron\": {\"type\": \"string\", \"description\": \"5-field cron expression: 'min hour dom month dow'\"},\n \"prompt\": {\"type\": \"string\", \"description\": \"The prompt to inject when the task fires\"},\n \"recurring\": {\"type\": \"boolean\", \"description\": \"true=repeat, false=fire once then delete. Default true.\"},\n \"durable\": {\"type\": \"boolean\", \"description\": \"true=persist to disk, false=session-only. Default false.\"},\n }, \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"cron_delete\", \"description\": \"Delete a scheduled task by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\n \"id\": {\"type\": \"string\", \"description\": \"Task ID to delete\"},\n }, \"required\": [\"id\"]}},\n {\"name\": \"cron_list\", \"description\": \"List all scheduled tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\\n\\nYou can schedule future work with cron_create. Tasks fire automatically and their prompts are injected into the conversation.\"\n\n\ndef agent_loop(messages: list):\n \"\"\"\n Cron-aware agent loop.\n\n Before each LLM call, drain the notification queue and inject any\n fired task prompts as user messages. This is how the agent \"wakes up\"\n to handle scheduled work.\n \"\"\"\n while True:\n # Drain scheduled task notifications\n notifications = scheduler.drain_notifications()\n for note in notifications:\n print(f\"[Cron notification] {note[:100]}\")\n messages.append({\"role\": \"user\", \"content\": note})\n\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n scheduler.start()\n print(\"[Cron scheduler running. Background checks every second.]\")\n print(\"[Commands: /cron to list tasks, /test to fire a test notification]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms14 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n scheduler.stop()\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n scheduler.stop()\n break\n\n if query.strip() == \"/cron\":\n print(scheduler.list_tasks())\n continue\n\n if query.strip() == \"/test\":\n # Manually enqueue a test notification for demonstration\n scheduler.queue.put(\"[Scheduled task test-0000]: This is a test notification.\")\n print(\"[Test notification enqueued. It will be injected on your next message.]\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): time(时间)——智能体可安排未来工作。\n\"\"\"\ns14_cron_scheduler.py - Cron / Scheduled Tasks(定时任务)\n\n智能体可通过标准 cron 表达式安排未来执行的提示。\n当计划命中当前时间,会把通知回注到主对话循环。\n\n Cron expression(cron 表达式): 5 fields(5 个字段)\n +-------+-------+-------+-------+-------+\n | min(分) | hour(时) | dom(日) | month(月) | dow(周) |\n | 0-59 | 0-23 | 1-31 | 1-12 | 0-6 |\n +-------+-------+-------+-------+-------+\n 示例(Examples):\n \"*/5 * * * *\" -> 每 5 分钟触发\n \"0 9 * * 1\" -> 每周一 09:00 触发\n \"30 14 * * *\" -> 每天 14:30 触发\n\n 两种持久化模式(Two persistence modes):\n +--------------------+-------------------------------+\n | session-only | 仅内存列表,退出即丢失 |\n | durable | 持久化到 .claude/scheduled_tasks.json |\n +--------------------+-------------------------------+\n\n 两种触发模式(Two trigger modes):\n +--------------------+-------------------------------+\n | recurring | 重复触发,直到删除或 7 天自动过期 |\n | one-shot | 仅触发一次,随后自动删除 |\n +--------------------+-------------------------------+\n\n Jitter(抖动)说明:recurring 任务可避开整分钟边界,减少同点拥堵。\n\n 架构(Architecture):\n +-------------------------------+\n | 后台线程(Background thread) |\n | (每 1 秒检查一次) |\n | |\n | 对每个任务执行: |\n | if cron_matches(now): |\n | enqueue notification(入队通知) |\n +-------------------------------+\n |\n v\n [notification_queue]\n |\n (在 agent_loop 顶部 drain)\n |\n v\n [在 LLM 调用前注入为 user 消息]\n\n核心观点:调度系统负责记住未来工作,并在到点后把它交回同一主循环。\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom datetime import datetime, timedelta\nfrom pathlib import Path\nfrom queue import Queue, Empty\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\n\nSCHEDULED_TASKS_FILE = WORKDIR / \".claude\" / \"scheduled_tasks.json\"\nCRON_LOCK_FILE = WORKDIR / \".claude\" / \"cron.lock\"\nAUTO_EXPIRY_DAYS = 7\nJITTER_MINUTES = [0, 30] # recurring 任务尽量避开这两个整点分钟位\nJITTER_OFFSET_MAX = 4 # 偏移范围(分钟)\n# 教学版:在需要时使用 1-4 分钟的简单偏移。\n\n\nclass CronLock:\n \"\"\"\n 基于 PID 文件的锁,防止多会话重复触发同一 cron 任务。\n \"\"\"\n\n def __init__(self, lock_path: Path = None):\n self._lock_path = lock_path or CRON_LOCK_FILE\n\n def acquire(self) -> bool:\n \"\"\"\n 尝试获取 cron 锁。成功返回 True。\n\n 若锁文件存在,先检查其中 PID 是否仍存活;\n 若进程已死,则视为陈旧锁并接管。\n \"\"\"\n if self._lock_path.exists():\n try:\n stored_pid = int(self._lock_path.read_text().strip())\n # PID 存活探测:发送 signal 0(不实际杀进程)\n os.kill(stored_pid, 0)\n # 进程活跃:锁由其他会话持有\n return False\n except (ValueError, ProcessLookupError, PermissionError, OSError):\n # 陈旧锁(进程死亡或 PID 无法解析)-> 删除\n pass\n self._lock_path.parent.mkdir(parents=True, exist_ok=True)\n self._lock_path.write_text(str(os.getpid()))\n return True\n\n def release(self):\n \"\"\"若锁归当前进程持有,则删除锁文件。\"\"\"\n try:\n if self._lock_path.exists():\n stored_pid = int(self._lock_path.read_text().strip())\n if stored_pid == os.getpid():\n self._lock_path.unlink()\n except (ValueError, OSError):\n pass\n\n\ndef cron_matches(expr: str, dt: datetime) -> bool:\n \"\"\"\n 判断 5 字段 cron 表达式是否匹配给定时间。\n\n 字段顺序:minute hour day-of-month month day-of-week\n 支持语法:*(任意)、*/N(每 N)、N(精确)、N-M(范围)、N,M(列表)\n\n 无外部依赖,采用手工匹配逻辑。\n \"\"\"\n fields = expr.strip().split()\n if len(fields) != 5:\n return False\n\n values = [dt.minute, dt.hour, dt.day, dt.month, dt.weekday()]\n # Python weekday: 0=Monday;cron: 0=Sunday。需转换。\n cron_dow = (dt.weekday() + 1) % 7\n values[4] = cron_dow\n ranges = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n\n for field, value, (lo, hi) in zip(fields, values, ranges):\n if not _field_matches(field, value, lo, hi):\n return False\n return True\n\n\ndef _field_matches(field: str, value: int, lo: int, hi: int) -> bool:\n \"\"\"匹配单个 cron 字段。\"\"\"\n if field == \"*\":\n return True\n\n for part in field.split(\",\"):\n # 处理步长:*/N 或 N-M/S\n step = 1\n if \"/\" in part:\n part, step_str = part.split(\"/\", 1)\n step = int(step_str)\n\n if part == \"*\":\n # */N:检查 value 是否落在步长网格上\n if (value - lo) % step == 0:\n return True\n elif \"-\" in part:\n # 范围:N-M\n start, end = part.split(\"-\", 1)\n start, end = int(start), int(end)\n if start <= value <= end and (value - start) % step == 0:\n return True\n else:\n # 精确值\n if int(part) == value:\n return True\n\n return False\n\n\nclass CronScheduler:\n \"\"\"\n 管理定时任务与后台检查线程。\n\n 教学版仅保留核心组件:计划记录、分钟级检查、可选持久化、通知队列。\n \"\"\"\n\n def __init__(self):\n self.tasks = [] # 任务字典列表\n self.queue = Queue() # 通知队列\n self._stop_event = threading.Event()\n self._thread = None\n self._last_check_minute = -1 # 避免同一分钟内重复触发\n\n def start(self):\n \"\"\"加载持久任务并启动后台检查线程。\"\"\"\n self._load_durable()\n self._thread = threading.Thread(target=self._check_loop, daemon=True)\n self._thread.start()\n count = len(self.tasks)\n if count:\n print(f\"[Cron] Loaded {count} scheduled tasks\")\n\n def stop(self):\n \"\"\"停止后台线程。\"\"\"\n self._stop_event.set()\n if self._thread:\n self._thread.join(timeout=2)\n\n def create(self, cron_expr: str, prompt: str,\n recurring: bool = True, durable: bool = False) -> str:\n \"\"\"创建定时任务并返回 task ID。\"\"\"\n task_id = str(uuid.uuid4())[:8]\n now = time.time()\n\n task = {\n \"id\": task_id,\n \"cron\": cron_expr,\n \"prompt\": prompt,\n \"recurring\": recurring,\n \"durable\": durable,\n \"createdAt\": now,\n }\n\n # recurring 任务启用 jitter:在 :00 / :30 触发点做轻微偏移\n if recurring:\n task[\"jitter_offset\"] = self._compute_jitter(cron_expr)\n\n self.tasks.append(task)\n if durable:\n self._save_durable()\n\n mode = \"recurring\" if recurring else \"one-shot\"\n store = \"durable\" if durable else \"session-only\"\n return f\"Created task {task_id} ({mode}, {store}): cron={cron_expr}\"\n\n def delete(self, task_id: str) -> str:\n \"\"\"按 ID 删除定时任务。\"\"\"\n before = len(self.tasks)\n self.tasks = [t for t in self.tasks if t[\"id\"] != task_id]\n if len(self.tasks) < before:\n self._save_durable()\n return f\"Deleted task {task_id}\"\n return f\"Task {task_id} not found\"\n\n def list_tasks(self) -> str:\n \"\"\"列出全部定时任务。\"\"\"\n if not self.tasks:\n return \"No scheduled tasks.\"\n lines = []\n for t in self.tasks:\n mode = \"recurring\" if t[\"recurring\"] else \"one-shot\"\n store = \"durable\" if t[\"durable\"] else \"session\"\n age_hours = (time.time() - t[\"createdAt\"]) / 3600\n lines.append(\n f\" {t['id']} {t['cron']} [{mode}/{store}] \"\n f\"({age_hours:.1f}h old): {t['prompt'][:60]}\"\n )\n return \"\\n\".join(lines)\n\n def drain_notifications(self) -> list[str]:\n \"\"\"从通知队列中取出并清空当前所有待投递通知。\"\"\"\n notifications = []\n while True:\n try:\n notifications.append(self.queue.get_nowait())\n except Empty:\n break\n return notifications\n\n def _compute_jitter(self, cron_expr: str) -> int:\n \"\"\"若 cron 命中 :00 或 :30,则返回 1-4 分钟的小偏移量。\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) < 1:\n return 0\n minute_field = fields[0]\n try:\n minute_val = int(minute_field)\n if minute_val in JITTER_MINUTES:\n # 基于表达式哈希的确定性 jitter\n return (hash(cron_expr) % JITTER_OFFSET_MAX) + 1\n except ValueError:\n pass\n return 0\n\n def _check_loop(self):\n \"\"\"后台线程:每秒检查一次是否有任务到期。\"\"\"\n while not self._stop_event.is_set():\n now = datetime.now()\n current_minute = now.hour * 60 + now.minute\n\n # 每分钟仅检查一次,避免重复触发\n if current_minute != self._last_check_minute:\n self._last_check_minute = current_minute\n self._check_tasks(now)\n\n self._stop_event.wait(timeout=1)\n\n def _check_tasks(self, now: datetime):\n \"\"\"用当前时间匹配全部任务,并触发命中的任务。\"\"\"\n expired = []\n fired_oneshots = []\n\n for task in self.tasks:\n # 自动过期:recurring 任务超过 7 天即过期\n age_days = (time.time() - task[\"createdAt\"]) / 86400\n if task[\"recurring\"] and age_days > AUTO_EXPIRY_DAYS:\n expired.append(task[\"id\"])\n continue\n\n # 匹配检查时应用 jitter 偏移\n check_time = now\n jitter = task.get(\"jitter_offset\", 0)\n if jitter:\n check_time = now - timedelta(minutes=jitter)\n\n if cron_matches(task[\"cron\"], check_time):\n notification = (\n f\"[Scheduled task {task['id']}]: {task['prompt']}\"\n )\n self.queue.put(notification)\n task[\"last_fired\"] = time.time()\n print(f\"[Cron] Fired: {task['id']}\")\n\n if not task[\"recurring\"]:\n fired_oneshots.append(task[\"id\"])\n\n # 清理过期任务与 one-shot 任务\n if expired or fired_oneshots:\n remove_ids = set(expired) | set(fired_oneshots)\n self.tasks = [t for t in self.tasks if t[\"id\"] not in remove_ids]\n for tid in expired:\n print(f\"[Cron] 已自动过期:{tid}(超过 {AUTO_EXPIRY_DAYS} 天)\")\n for tid in fired_oneshots:\n print(f\"[Cron] one-shot 任务已完成并移除:{tid}\")\n self._save_durable()\n\n def _load_durable(self):\n \"\"\"从 `.claude/scheduled_tasks.json` 加载持久化任务。\"\"\"\n if not SCHEDULED_TASKS_FILE.exists():\n return\n try:\n data = json.loads(SCHEDULED_TASKS_FILE.read_text())\n # 仅加载 durable(持久化)任务\n self.tasks = [t for t in data if t.get(\"durable\")]\n except Exception as e:\n print(f\"[Cron] 加载任务失败:{e}\")\n\n def detect_missed_tasks(self) -> list[dict]:\n \"\"\"\n 启动时检查每个持久任务的 `last_fired` 时间。\n\n 若任务在会话关闭期间本应触发(即 last_fired 到 now 区间内\n 至少存在一次 cron 命中),则将其标记为漏触发。调用方可再让\n 用户决定是执行还是丢弃这些漏触发任务。\n\n \"\"\"\n now = datetime.now()\n missed = []\n for task in self.tasks:\n last_fired = task.get(\"last_fired\")\n if last_fired is None:\n continue\n last_dt = datetime.fromtimestamp(last_fired)\n # 从 last_fired 到 now 逐分钟推进检查(最多追溯 24 小时)\n check = last_dt + timedelta(minutes=1)\n cap = min(now, last_dt + timedelta(hours=24))\n while check <= cap:\n if cron_matches(task[\"cron\"], check):\n missed.append({\n \"id\": task[\"id\"],\n \"cron\": task[\"cron\"],\n \"prompt\": task[\"prompt\"],\n \"missed_at\": check.isoformat(),\n })\n break # 命中一次漏触发即可标记\n check += timedelta(minutes=1)\n return missed\n\n def _save_durable(self):\n \"\"\"将持久任务写回磁盘。\"\"\"\n durable = [t for t in self.tasks if t.get(\"durable\")]\n SCHEDULED_TASKS_FILE.parent.mkdir(parents=True, exist_ok=True)\n SCHEDULED_TASKS_FILE.write_text(\n json.dumps(durable, indent=2) + \"\\n\"\n )\n\n\n# 全局调度器\nscheduler = CronScheduler()\n\n\n# -- 工具实现 --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"cron_create\": lambda **kw: scheduler.create(\n kw[\"cron\"], kw[\"prompt\"], kw.get(\"recurring\", True), kw.get(\"durable\", False)),\n \"cron_delete\": lambda **kw: scheduler.delete(kw[\"id\"]),\n \"cron_list\": lambda **kw: scheduler.list_tasks(),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"cron_create\", \"description\": \"使用 cron 表达式创建 recurring 或 one-shot 定时任务。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\n \"cron\": {\"type\": \"string\", \"description\": \"5 字段 cron 表达式:'min hour dom month dow'\"},\n \"prompt\": {\"type\": \"string\", \"description\": \"任务触发时注入会话的提示内容\"},\n \"recurring\": {\"type\": \"boolean\", \"description\": \"true=重复触发,false=触发一次后删除;默认 true\"},\n \"durable\": {\"type\": \"boolean\", \"description\": \"true=落盘持久化,false=仅当前会话;默认 false\"},\n }, \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"cron_delete\", \"description\": \"按 ID 删除定时任务。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\n \"id\": {\"type\": \"string\", \"description\": \"要删除的任务 ID\"},\n }, \"required\": [\"id\"]}},\n {\"name\": \"cron_list\", \"description\": \"列出全部定时任务。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n]\n\nSYSTEM = (\n f\"你是位于 {WORKDIR} 的 coding agent(编码智能体),请使用工具解决任务。\\n\\n\"\n \"你可以通过 cron_create 调度未来工作。任务触发后会自动把提示注入当前会话。\"\n)\n\n\ndef agent_loop(messages: list):\n \"\"\"\n 带 cron 调度感知的智能体主循环。\n\n 每次调用 LLM 前,先清空通知队列,并将已触发任务的提示词\n 注入为 user 消息。这样智能体就能“唤醒”并处理计划任务。\n \"\"\"\n while True:\n # 清空并处理定时任务通知\n notifications = scheduler.drain_notifications()\n for note in notifications:\n print(f\"[Cron notification] {note[:100]}\")\n messages.append({\"role\": \"user\", \"content\": note})\n\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n scheduler.start()\n print(\"[Cron 调度器已启动,后台每秒检查一次。]\")\n print(\"[命令:/cron 查看任务,/test 触发一条测试通知]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms14 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n scheduler.stop()\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n scheduler.stop()\n break\n\n if query.strip() == \"/cron\":\n print(scheduler.list_tasks())\n continue\n\n if query.strip() == \"/test\":\n # 演示用途:手动插入一条测试通知\n scheduler.queue.put(\"[计划任务 test-0000]:这是一条测试通知。\")\n print(\"[测试通知已入队,将在你下一条消息前注入。]\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: time -- the agent schedules its own future work.\n\"\"\"\ns14_cron_scheduler.py - Cron / Scheduled Tasks\n\nThe agent can schedule prompts for future execution using standard cron\nexpressions. When a schedule matches the current time, it pushes a\nnotification back into the main conversation loop.\n\n Cron expression: 5 fields\n +-------+-------+-------+-------+-------+\n | min | hour | dom | month | dow |\n | 0-59 | 0-23 | 1-31 | 1-12 | 0-6 |\n +-------+-------+-------+-------+-------+\n Examples:\n \"*/5 * * * *\" -> every 5 minutes\n \"0 9 * * 1\" -> Monday 9:00 AM\n \"30 14 * * *\" -> daily 2:30 PM\n\n Two persistence modes:\n +--------------------+-------------------------------+\n | session-only | In-memory list, lost on exit |\n | durable | .claude/scheduled_tasks.json |\n +--------------------+-------------------------------+\n\n Two trigger modes:\n +--------------------+-------------------------------+\n | recurring | Repeats until deleted or |\n | | 7-day auto-expiry |\n | one-shot | Fires once, then auto-deleted |\n +--------------------+-------------------------------+\n\n Jitter: recurring tasks can avoid exact minute boundaries.\n\n Architecture:\n +-------------------------------+\n | Background thread |\n | (checks every 1 second) |\n | |\n | for each task: |\n | if cron_matches(now): |\n | enqueue notification |\n +-------------------------------+\n |\n v\n [notification_queue]\n |\n (drained at top of agent_loop)\n |\n v\n [injected as user messages before LLM call]\n\nKey idea: scheduling remembers future work, then hands it back to the\nsame main loop when the time arrives.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom datetime import datetime, timedelta\nfrom pathlib import Path\nfrom queue import Queue, Empty\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSCHEDULED_TASKS_FILE = WORKDIR / \".claude\" / \"scheduled_tasks.json\"\nCRON_LOCK_FILE = WORKDIR / \".claude\" / \"cron.lock\"\nAUTO_EXPIRY_DAYS = 7\nJITTER_MINUTES = [0, 30] # avoid these exact minutes for recurring tasks\nJITTER_OFFSET_MAX = 4 # offset range in minutes\n# Teaching version: use a simple 1-4 minute offset when needed.\n\n\nclass CronLock:\n \"\"\"\n PID-file-based lock to prevent multiple sessions from firing the same cron job.\n \"\"\"\n\n def __init__(self, lock_path: Path = None):\n self._lock_path = lock_path or CRON_LOCK_FILE\n\n def acquire(self) -> bool:\n \"\"\"\n Try to acquire the cron lock. Returns True on success.\n\n If a lock file exists, check whether the PID inside is still alive.\n If the process is dead the lock is stale and we can take over.\n \"\"\"\n if self._lock_path.exists():\n try:\n stored_pid = int(self._lock_path.read_text().strip())\n # PID liveness probe: send signal 0 (no-op) to check existence\n os.kill(stored_pid, 0)\n # Process is alive -- lock is held by another session\n return False\n except (ValueError, ProcessLookupError, PermissionError, OSError):\n # Stale lock (process dead or PID unparseable) -- remove it\n pass\n self._lock_path.parent.mkdir(parents=True, exist_ok=True)\n self._lock_path.write_text(str(os.getpid()))\n return True\n\n def release(self):\n \"\"\"Remove the lock file if it belongs to this process.\"\"\"\n try:\n if self._lock_path.exists():\n stored_pid = int(self._lock_path.read_text().strip())\n if stored_pid == os.getpid():\n self._lock_path.unlink()\n except (ValueError, OSError):\n pass\n\n\ndef cron_matches(expr: str, dt: datetime) -> bool:\n \"\"\"\n Check if a 5-field cron expression matches a given datetime.\n\n Fields: minute hour day-of-month month day-of-week\n Supports: * (any), */N (every N), N (exact), N-M (range), N,M (list)\n\n No external dependencies -- simple manual matching.\n \"\"\"\n fields = expr.strip().split()\n if len(fields) != 5:\n return False\n\n values = [dt.minute, dt.hour, dt.day, dt.month, dt.weekday()]\n # Python weekday: 0=Monday; cron: 0=Sunday. Convert.\n cron_dow = (dt.weekday() + 1) % 7\n values[4] = cron_dow\n ranges = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n\n for field, value, (lo, hi) in zip(fields, values, ranges):\n if not _field_matches(field, value, lo, hi):\n return False\n return True\n\n\ndef _field_matches(field: str, value: int, lo: int, hi: int) -> bool:\n \"\"\"Match a single cron field against a value.\"\"\"\n if field == \"*\":\n return True\n\n for part in field.split(\",\"):\n # Handle step: */N or N-M/S\n step = 1\n if \"/\" in part:\n part, step_str = part.split(\"/\", 1)\n step = int(step_str)\n\n if part == \"*\":\n # */N -- check if value is on the step grid\n if (value - lo) % step == 0:\n return True\n elif \"-\" in part:\n # Range: N-M\n start, end = part.split(\"-\", 1)\n start, end = int(start), int(end)\n if start <= value <= end and (value - start) % step == 0:\n return True\n else:\n # Exact value\n if int(part) == value:\n return True\n\n return False\n\n\nclass CronScheduler:\n \"\"\"\n Manage scheduled tasks with background checking.\n\n Teaching version keeps only the core pieces: schedule records, a\n minute checker, optional persistence, and a notification queue.\n \"\"\"\n\n def __init__(self):\n self.tasks = [] # list of task dicts\n self.queue = Queue() # notification queue\n self._stop_event = threading.Event()\n self._thread = None\n self._last_check_minute = -1 # avoid double-firing within same minute\n\n def start(self):\n \"\"\"Load durable tasks and start the background check thread.\"\"\"\n self._load_durable()\n self._thread = threading.Thread(target=self._check_loop, daemon=True)\n self._thread.start()\n count = len(self.tasks)\n if count:\n print(f\"[Cron] Loaded {count} scheduled tasks\")\n\n def stop(self):\n \"\"\"Stop the background thread.\"\"\"\n self._stop_event.set()\n if self._thread:\n self._thread.join(timeout=2)\n\n def create(self, cron_expr: str, prompt: str,\n recurring: bool = True, durable: bool = False) -> str:\n \"\"\"Create a new scheduled task. Returns the task ID.\"\"\"\n task_id = str(uuid.uuid4())[:8]\n now = time.time()\n\n task = {\n \"id\": task_id,\n \"cron\": cron_expr,\n \"prompt\": prompt,\n \"recurring\": recurring,\n \"durable\": durable,\n \"createdAt\": now,\n }\n\n # Jitter for recurring tasks: if the cron fires on :00 or :30,\n # note it so we can offset the check slightly\n if recurring:\n task[\"jitter_offset\"] = self._compute_jitter(cron_expr)\n\n self.tasks.append(task)\n if durable:\n self._save_durable()\n\n mode = \"recurring\" if recurring else \"one-shot\"\n store = \"durable\" if durable else \"session-only\"\n return f\"Created task {task_id} ({mode}, {store}): cron={cron_expr}\"\n\n def delete(self, task_id: str) -> str:\n \"\"\"Delete a scheduled task by ID.\"\"\"\n before = len(self.tasks)\n self.tasks = [t for t in self.tasks if t[\"id\"] != task_id]\n if len(self.tasks) < before:\n self._save_durable()\n return f\"Deleted task {task_id}\"\n return f\"Task {task_id} not found\"\n\n def list_tasks(self) -> str:\n \"\"\"List all scheduled tasks.\"\"\"\n if not self.tasks:\n return \"No scheduled tasks.\"\n lines = []\n for t in self.tasks:\n mode = \"recurring\" if t[\"recurring\"] else \"one-shot\"\n store = \"durable\" if t[\"durable\"] else \"session\"\n age_hours = (time.time() - t[\"createdAt\"]) / 3600\n lines.append(\n f\" {t['id']} {t['cron']} [{mode}/{store}] \"\n f\"({age_hours:.1f}h old): {t['prompt'][:60]}\"\n )\n return \"\\n\".join(lines)\n\n def drain_notifications(self) -> list[str]:\n \"\"\"Drain all pending notifications from the queue.\"\"\"\n notifications = []\n while True:\n try:\n notifications.append(self.queue.get_nowait())\n except Empty:\n break\n return notifications\n\n def _compute_jitter(self, cron_expr: str) -> int:\n \"\"\"If cron targets :00 or :30, return a small offset (1-4 minutes).\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) < 1:\n return 0\n minute_field = fields[0]\n try:\n minute_val = int(minute_field)\n if minute_val in JITTER_MINUTES:\n # Deterministic jitter based on the expression hash\n return (hash(cron_expr) % JITTER_OFFSET_MAX) + 1\n except ValueError:\n pass\n return 0\n\n def _check_loop(self):\n \"\"\"Background thread: check every second if any task is due.\"\"\"\n while not self._stop_event.is_set():\n now = datetime.now()\n current_minute = now.hour * 60 + now.minute\n\n # Only check once per minute to avoid double-firing\n if current_minute != self._last_check_minute:\n self._last_check_minute = current_minute\n self._check_tasks(now)\n\n self._stop_event.wait(timeout=1)\n\n def _check_tasks(self, now: datetime):\n \"\"\"Check all tasks against current time, fire matches.\"\"\"\n expired = []\n fired_oneshots = []\n\n for task in self.tasks:\n # Auto-expiry: recurring tasks older than 7 days\n age_days = (time.time() - task[\"createdAt\"]) / 86400\n if task[\"recurring\"] and age_days > AUTO_EXPIRY_DAYS:\n expired.append(task[\"id\"])\n continue\n\n # Apply jitter offset for the match check\n check_time = now\n jitter = task.get(\"jitter_offset\", 0)\n if jitter:\n check_time = now - timedelta(minutes=jitter)\n\n if cron_matches(task[\"cron\"], check_time):\n notification = (\n f\"[Scheduled task {task['id']}]: {task['prompt']}\"\n )\n self.queue.put(notification)\n task[\"last_fired\"] = time.time()\n print(f\"[Cron] Fired: {task['id']}\")\n\n if not task[\"recurring\"]:\n fired_oneshots.append(task[\"id\"])\n\n # Clean up expired and one-shot tasks\n if expired or fired_oneshots:\n remove_ids = set(expired) | set(fired_oneshots)\n self.tasks = [t for t in self.tasks if t[\"id\"] not in remove_ids]\n for tid in expired:\n print(f\"[Cron] Auto-expired: {tid} (older than {AUTO_EXPIRY_DAYS} days)\")\n for tid in fired_oneshots:\n print(f\"[Cron] One-shot completed and removed: {tid}\")\n self._save_durable()\n\n def _load_durable(self):\n \"\"\"Load durable tasks from .claude/scheduled_tasks.json.\"\"\"\n if not SCHEDULED_TASKS_FILE.exists():\n return\n try:\n data = json.loads(SCHEDULED_TASKS_FILE.read_text())\n # Only load durable tasks\n self.tasks = [t for t in data if t.get(\"durable\")]\n except Exception as e:\n print(f\"[Cron] Error loading tasks: {e}\")\n\n def detect_missed_tasks(self) -> list[dict]:\n \"\"\"\n On startup, check each durable task's last_fired time.\n\n If a task should have fired while the session was closed (i.e.\n the gap between last_fired and now contains at least one cron match),\n flag it as missed. The caller can then let the user decide whether\n to run or discard each missed task.\n\n \"\"\"\n now = datetime.now()\n missed = []\n for task in self.tasks:\n last_fired = task.get(\"last_fired\")\n if last_fired is None:\n continue\n last_dt = datetime.fromtimestamp(last_fired)\n # Walk forward minute-by-minute from last_fired to now (cap at 24h)\n check = last_dt + timedelta(minutes=1)\n cap = min(now, last_dt + timedelta(hours=24))\n while check <= cap:\n if cron_matches(task[\"cron\"], check):\n missed.append({\n \"id\": task[\"id\"],\n \"cron\": task[\"cron\"],\n \"prompt\": task[\"prompt\"],\n \"missed_at\": check.isoformat(),\n })\n break # one miss is enough to flag it\n check += timedelta(minutes=1)\n return missed\n\n def _save_durable(self):\n \"\"\"Save durable tasks to disk.\"\"\"\n durable = [t for t in self.tasks if t.get(\"durable\")]\n SCHEDULED_TASKS_FILE.parent.mkdir(parents=True, exist_ok=True)\n SCHEDULED_TASKS_FILE.write_text(\n json.dumps(durable, indent=2) + \"\\n\"\n )\n\n\n# Global scheduler\nscheduler = CronScheduler()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"cron_create\": lambda **kw: scheduler.create(\n kw[\"cron\"], kw[\"prompt\"], kw.get(\"recurring\", True), kw.get(\"durable\", False)),\n \"cron_delete\": lambda **kw: scheduler.delete(kw[\"id\"]),\n \"cron_list\": lambda **kw: scheduler.list_tasks(),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"cron_create\", \"description\": \"Schedule a recurring or one-shot task with a cron expression.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\n \"cron\": {\"type\": \"string\", \"description\": \"5-field cron expression: 'min hour dom month dow'\"},\n \"prompt\": {\"type\": \"string\", \"description\": \"The prompt to inject when the task fires\"},\n \"recurring\": {\"type\": \"boolean\", \"description\": \"true=repeat, false=fire once then delete. Default true.\"},\n \"durable\": {\"type\": \"boolean\", \"description\": \"true=persist to disk, false=session-only. Default false.\"},\n }, \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"cron_delete\", \"description\": \"Delete a scheduled task by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\n \"id\": {\"type\": \"string\", \"description\": \"Task ID to delete\"},\n }, \"required\": [\"id\"]}},\n {\"name\": \"cron_list\", \"description\": \"List all scheduled tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\\n\\nYou can schedule future work with cron_create. Tasks fire automatically and their prompts are injected into the conversation.\"\n\n\ndef agent_loop(messages: list):\n \"\"\"\n Cron-aware agent loop.\n\n Before each LLM call, drain the notification queue and inject any\n fired task prompts as user messages. This is how the agent \"wakes up\"\n to handle scheduled work.\n \"\"\"\n while True:\n # Drain scheduled task notifications\n notifications = scheduler.drain_notifications()\n for note in notifications:\n print(f\"[Cron notification] {note[:100]}\")\n messages.append({\"role\": \"user\", \"content\": note})\n\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n scheduler.start()\n print(\"[Cron scheduler running. Background checks every second.]\")\n print(\"[Commands: /cron to list tasks, /test to fire a test notification]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms14 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n scheduler.stop()\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n scheduler.stop()\n break\n\n if query.strip() == \"/cron\":\n print(scheduler.list_tasks())\n continue\n\n if query.strip() == \"/test\":\n # Manually enqueue a test notification for demonstration\n scheduler.queue.put(\"[Scheduled task test-0000]: This is a test notification.\")\n print(\"[Test notification enqueued. It will be injected on your next message.]\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: time -- the agent schedules its own future work.\n\"\"\"\ns14_cron_scheduler.py - Cron / Scheduled Tasks\n\nThe agent can schedule prompts for future execution using standard cron\nexpressions. When a schedule matches the current time, it pushes a\nnotification back into the main conversation loop.\n\n Cron expression: 5 fields\n +-------+-------+-------+-------+-------+\n | min | hour | dom | month | dow |\n | 0-59 | 0-23 | 1-31 | 1-12 | 0-6 |\n +-------+-------+-------+-------+-------+\n Examples:\n \"*/5 * * * *\" -> every 5 minutes\n \"0 9 * * 1\" -> Monday 9:00 AM\n \"30 14 * * *\" -> daily 2:30 PM\n\n Two persistence modes:\n +--------------------+-------------------------------+\n | session-only | In-memory list, lost on exit |\n | durable | .claude/scheduled_tasks.json |\n +--------------------+-------------------------------+\n\n Two trigger modes:\n +--------------------+-------------------------------+\n | recurring | Repeats until deleted or |\n | | 7-day auto-expiry |\n | one-shot | Fires once, then auto-deleted |\n +--------------------+-------------------------------+\n\n Jitter: recurring tasks can avoid exact minute boundaries.\n\n Architecture:\n +-------------------------------+\n | Background thread |\n | (checks every 1 second) |\n | |\n | for each task: |\n | if cron_matches(now): |\n | enqueue notification |\n +-------------------------------+\n |\n v\n [notification_queue]\n |\n (drained at top of agent_loop)\n |\n v\n [injected as user messages before LLM call]\n\nKey idea: scheduling remembers future work, then hands it back to the\nsame main loop when the time arrives.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom datetime import datetime, timedelta\nfrom pathlib import Path\nfrom queue import Queue, Empty\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSCHEDULED_TASKS_FILE = WORKDIR / \".claude\" / \"scheduled_tasks.json\"\nCRON_LOCK_FILE = WORKDIR / \".claude\" / \"cron.lock\"\nAUTO_EXPIRY_DAYS = 7\nJITTER_MINUTES = [0, 30] # avoid these exact minutes for recurring tasks\nJITTER_OFFSET_MAX = 4 # offset range in minutes\n# Teaching version: use a simple 1-4 minute offset when needed.\n\n\nclass CronLock:\n \"\"\"\n PID-file-based lock to prevent multiple sessions from firing the same cron job.\n \"\"\"\n\n def __init__(self, lock_path: Path = None):\n self._lock_path = lock_path or CRON_LOCK_FILE\n\n def acquire(self) -> bool:\n \"\"\"\n Try to acquire the cron lock. Returns True on success.\n\n If a lock file exists, check whether the PID inside is still alive.\n If the process is dead the lock is stale and we can take over.\n \"\"\"\n if self._lock_path.exists():\n try:\n stored_pid = int(self._lock_path.read_text().strip())\n # PID liveness probe: send signal 0 (no-op) to check existence\n os.kill(stored_pid, 0)\n # Process is alive -- lock is held by another session\n return False\n except (ValueError, ProcessLookupError, PermissionError, OSError):\n # Stale lock (process dead or PID unparseable) -- remove it\n pass\n self._lock_path.parent.mkdir(parents=True, exist_ok=True)\n self._lock_path.write_text(str(os.getpid()))\n return True\n\n def release(self):\n \"\"\"Remove the lock file if it belongs to this process.\"\"\"\n try:\n if self._lock_path.exists():\n stored_pid = int(self._lock_path.read_text().strip())\n if stored_pid == os.getpid():\n self._lock_path.unlink()\n except (ValueError, OSError):\n pass\n\n\ndef cron_matches(expr: str, dt: datetime) -> bool:\n \"\"\"\n Check if a 5-field cron expression matches a given datetime.\n\n Fields: minute hour day-of-month month day-of-week\n Supports: * (any), */N (every N), N (exact), N-M (range), N,M (list)\n\n No external dependencies -- simple manual matching.\n \"\"\"\n fields = expr.strip().split()\n if len(fields) != 5:\n return False\n\n values = [dt.minute, dt.hour, dt.day, dt.month, dt.weekday()]\n # Python weekday: 0=Monday; cron: 0=Sunday. Convert.\n cron_dow = (dt.weekday() + 1) % 7\n values[4] = cron_dow\n ranges = [(0, 59), (0, 23), (1, 31), (1, 12), (0, 6)]\n\n for field, value, (lo, hi) in zip(fields, values, ranges):\n if not _field_matches(field, value, lo, hi):\n return False\n return True\n\n\ndef _field_matches(field: str, value: int, lo: int, hi: int) -> bool:\n \"\"\"Match a single cron field against a value.\"\"\"\n if field == \"*\":\n return True\n\n for part in field.split(\",\"):\n # Handle step: */N or N-M/S\n step = 1\n if \"/\" in part:\n part, step_str = part.split(\"/\", 1)\n step = int(step_str)\n\n if part == \"*\":\n # */N -- check if value is on the step grid\n if (value - lo) % step == 0:\n return True\n elif \"-\" in part:\n # Range: N-M\n start, end = part.split(\"-\", 1)\n start, end = int(start), int(end)\n if start <= value <= end and (value - start) % step == 0:\n return True\n else:\n # Exact value\n if int(part) == value:\n return True\n\n return False\n\n\nclass CronScheduler:\n \"\"\"\n Manage scheduled tasks with background checking.\n\n Teaching version keeps only the core pieces: schedule records, a\n minute checker, optional persistence, and a notification queue.\n \"\"\"\n\n def __init__(self):\n self.tasks = [] # list of task dicts\n self.queue = Queue() # notification queue\n self._stop_event = threading.Event()\n self._thread = None\n self._last_check_minute = -1 # avoid double-firing within same minute\n\n def start(self):\n \"\"\"Load durable tasks and start the background check thread.\"\"\"\n self._load_durable()\n self._thread = threading.Thread(target=self._check_loop, daemon=True)\n self._thread.start()\n count = len(self.tasks)\n if count:\n print(f\"[Cron] Loaded {count} scheduled tasks\")\n\n def stop(self):\n \"\"\"Stop the background thread.\"\"\"\n self._stop_event.set()\n if self._thread:\n self._thread.join(timeout=2)\n\n def create(self, cron_expr: str, prompt: str,\n recurring: bool = True, durable: bool = False) -> str:\n \"\"\"Create a new scheduled task. Returns the task ID.\"\"\"\n task_id = str(uuid.uuid4())[:8]\n now = time.time()\n\n task = {\n \"id\": task_id,\n \"cron\": cron_expr,\n \"prompt\": prompt,\n \"recurring\": recurring,\n \"durable\": durable,\n \"createdAt\": now,\n }\n\n # Jitter for recurring tasks: if the cron fires on :00 or :30,\n # note it so we can offset the check slightly\n if recurring:\n task[\"jitter_offset\"] = self._compute_jitter(cron_expr)\n\n self.tasks.append(task)\n if durable:\n self._save_durable()\n\n mode = \"recurring\" if recurring else \"one-shot\"\n store = \"durable\" if durable else \"session-only\"\n return f\"Created task {task_id} ({mode}, {store}): cron={cron_expr}\"\n\n def delete(self, task_id: str) -> str:\n \"\"\"Delete a scheduled task by ID.\"\"\"\n before = len(self.tasks)\n self.tasks = [t for t in self.tasks if t[\"id\"] != task_id]\n if len(self.tasks) < before:\n self._save_durable()\n return f\"Deleted task {task_id}\"\n return f\"Task {task_id} not found\"\n\n def list_tasks(self) -> str:\n \"\"\"List all scheduled tasks.\"\"\"\n if not self.tasks:\n return \"No scheduled tasks.\"\n lines = []\n for t in self.tasks:\n mode = \"recurring\" if t[\"recurring\"] else \"one-shot\"\n store = \"durable\" if t[\"durable\"] else \"session\"\n age_hours = (time.time() - t[\"createdAt\"]) / 3600\n lines.append(\n f\" {t['id']} {t['cron']} [{mode}/{store}] \"\n f\"({age_hours:.1f}h old): {t['prompt'][:60]}\"\n )\n return \"\\n\".join(lines)\n\n def drain_notifications(self) -> list[str]:\n \"\"\"Drain all pending notifications from the queue.\"\"\"\n notifications = []\n while True:\n try:\n notifications.append(self.queue.get_nowait())\n except Empty:\n break\n return notifications\n\n def _compute_jitter(self, cron_expr: str) -> int:\n \"\"\"If cron targets :00 or :30, return a small offset (1-4 minutes).\"\"\"\n fields = cron_expr.strip().split()\n if len(fields) < 1:\n return 0\n minute_field = fields[0]\n try:\n minute_val = int(minute_field)\n if minute_val in JITTER_MINUTES:\n # Deterministic jitter based on the expression hash\n return (hash(cron_expr) % JITTER_OFFSET_MAX) + 1\n except ValueError:\n pass\n return 0\n\n def _check_loop(self):\n \"\"\"Background thread: check every second if any task is due.\"\"\"\n while not self._stop_event.is_set():\n now = datetime.now()\n current_minute = now.hour * 60 + now.minute\n\n # Only check once per minute to avoid double-firing\n if current_minute != self._last_check_minute:\n self._last_check_minute = current_minute\n self._check_tasks(now)\n\n self._stop_event.wait(timeout=1)\n\n def _check_tasks(self, now: datetime):\n \"\"\"Check all tasks against current time, fire matches.\"\"\"\n expired = []\n fired_oneshots = []\n\n for task in self.tasks:\n # Auto-expiry: recurring tasks older than 7 days\n age_days = (time.time() - task[\"createdAt\"]) / 86400\n if task[\"recurring\"] and age_days > AUTO_EXPIRY_DAYS:\n expired.append(task[\"id\"])\n continue\n\n # Apply jitter offset for the match check\n check_time = now\n jitter = task.get(\"jitter_offset\", 0)\n if jitter:\n check_time = now - timedelta(minutes=jitter)\n\n if cron_matches(task[\"cron\"], check_time):\n notification = (\n f\"[Scheduled task {task['id']}]: {task['prompt']}\"\n )\n self.queue.put(notification)\n task[\"last_fired\"] = time.time()\n print(f\"[Cron] Fired: {task['id']}\")\n\n if not task[\"recurring\"]:\n fired_oneshots.append(task[\"id\"])\n\n # Clean up expired and one-shot tasks\n if expired or fired_oneshots:\n remove_ids = set(expired) | set(fired_oneshots)\n self.tasks = [t for t in self.tasks if t[\"id\"] not in remove_ids]\n for tid in expired:\n print(f\"[Cron] Auto-expired: {tid} (older than {AUTO_EXPIRY_DAYS} days)\")\n for tid in fired_oneshots:\n print(f\"[Cron] One-shot completed and removed: {tid}\")\n self._save_durable()\n\n def _load_durable(self):\n \"\"\"Load durable tasks from .claude/scheduled_tasks.json.\"\"\"\n if not SCHEDULED_TASKS_FILE.exists():\n return\n try:\n data = json.loads(SCHEDULED_TASKS_FILE.read_text())\n # Only load durable tasks\n self.tasks = [t for t in data if t.get(\"durable\")]\n except Exception as e:\n print(f\"[Cron] Error loading tasks: {e}\")\n\n def detect_missed_tasks(self) -> list[dict]:\n \"\"\"\n On startup, check each durable task's last_fired time.\n\n If a task should have fired while the session was closed (i.e.\n the gap between last_fired and now contains at least one cron match),\n flag it as missed. The caller can then let the user decide whether\n to run or discard each missed task.\n\n \"\"\"\n now = datetime.now()\n missed = []\n for task in self.tasks:\n last_fired = task.get(\"last_fired\")\n if last_fired is None:\n continue\n last_dt = datetime.fromtimestamp(last_fired)\n # Walk forward minute-by-minute from last_fired to now (cap at 24h)\n check = last_dt + timedelta(minutes=1)\n cap = min(now, last_dt + timedelta(hours=24))\n while check <= cap:\n if cron_matches(task[\"cron\"], check):\n missed.append({\n \"id\": task[\"id\"],\n \"cron\": task[\"cron\"],\n \"prompt\": task[\"prompt\"],\n \"missed_at\": check.isoformat(),\n })\n break # one miss is enough to flag it\n check += timedelta(minutes=1)\n return missed\n\n def _save_durable(self):\n \"\"\"Save durable tasks to disk.\"\"\"\n durable = [t for t in self.tasks if t.get(\"durable\")]\n SCHEDULED_TASKS_FILE.parent.mkdir(parents=True, exist_ok=True)\n SCHEDULED_TASKS_FILE.write_text(\n json.dumps(durable, indent=2) + \"\\n\"\n )\n\n\n# Global scheduler\nscheduler = CronScheduler()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"cron_create\": lambda **kw: scheduler.create(\n kw[\"cron\"], kw[\"prompt\"], kw.get(\"recurring\", True), kw.get(\"durable\", False)),\n \"cron_delete\": lambda **kw: scheduler.delete(kw[\"id\"]),\n \"cron_list\": lambda **kw: scheduler.list_tasks(),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"cron_create\", \"description\": \"Schedule a recurring or one-shot task with a cron expression.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\n \"cron\": {\"type\": \"string\", \"description\": \"5-field cron expression: 'min hour dom month dow'\"},\n \"prompt\": {\"type\": \"string\", \"description\": \"The prompt to inject when the task fires\"},\n \"recurring\": {\"type\": \"boolean\", \"description\": \"true=repeat, false=fire once then delete. Default true.\"},\n \"durable\": {\"type\": \"boolean\", \"description\": \"true=persist to disk, false=session-only. Default false.\"},\n }, \"required\": [\"cron\", \"prompt\"]}},\n {\"name\": \"cron_delete\", \"description\": \"Delete a scheduled task by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\n \"id\": {\"type\": \"string\", \"description\": \"Task ID to delete\"},\n }, \"required\": [\"id\"]}},\n {\"name\": \"cron_list\", \"description\": \"List all scheduled tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\\n\\nYou can schedule future work with cron_create. Tasks fire automatically and their prompts are injected into the conversation.\"\n\n\ndef agent_loop(messages: list):\n \"\"\"\n Cron-aware agent loop.\n\n Before each LLM call, drain the notification queue and inject any\n fired task prompts as user messages. This is how the agent \"wakes up\"\n to handle scheduled work.\n \"\"\"\n while True:\n # Drain scheduled task notifications\n notifications = scheduler.drain_notifications()\n for note in notifications:\n print(f\"[Cron notification] {note[:100]}\")\n messages.append({\"role\": \"user\", \"content\": note})\n\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**(block.input or {})) if handler else f\"Unknown: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n scheduler.start()\n print(\"[Cron scheduler running. Background checks every second.]\")\n print(\"[Commands: /cron to list tasks, /test to fire a test notification]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms14 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n scheduler.stop()\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n scheduler.stop()\n break\n\n if query.strip() == \"/cron\":\n print(scheduler.list_tasks())\n continue\n\n if query.strip() == \"/test\":\n # Manually enqueue a test notification for demonstration\n scheduler.queue.put(\"[Scheduled task test-0000]: This is a test notification.\")\n print(\"[Test notification enqueued. It will be injected on your next message.]\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ {
+ "id": "s15",
+ "filename": "s15_agent_teams.py",
"title": "Agent Teams",
- "subtitle": "Teammates + Mailboxes",
- "loc": 348,
+ "subtitle": "Persistent Specialists",
+ "loc": 350,
"tools": [
"alice",
"bash",
@@ -487,61 +1071,66 @@
"list_teammates",
"broadcast"
],
- "coreAddition": "TeammateManager + file-based mailbox",
- "keyInsight": "When one agent can't finish, delegate to persistent teammates via async mailboxes",
+ "coreAddition": "Team roster + teammate lifecycle",
+ "keyInsight": "Teammates persist beyond one prompt, have identity, and coordinate through durable channels.",
"classes": [
{
"name": "MessageBus",
- "startLine": 77,
- "endLine": 118
+ "startLine": 84,
+ "endLine": 125
},
{
"name": "TeammateManager",
- "startLine": 123,
- "endLine": 249
+ "startLine": 130,
+ "endLine": 258
}
],
"functions": [
{
"name": "_safe_path",
"signature": "def _safe_path(p: str)",
- "startLine": 254
+ "startLine": 263
},
{
"name": "_run_bash",
"signature": "def _run_bash(command: str)",
- "startLine": 261
+ "startLine": 270
},
{
"name": "_run_read",
"signature": "def _run_read(path: str, limit: int = None)",
- "startLine": 276
+ "startLine": 285
},
{
"name": "_run_write",
"signature": "def _run_write(path: str, content: str)",
- "startLine": 286
+ "startLine": 295
},
{
"name": "_run_edit",
"signature": "def _run_edit(path: str, old_text: str, new_text: str)",
- "startLine": 296
+ "startLine": 305
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 344
+ "startLine": 353
}
],
- "layer": "collaboration",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns09_agent_teams.py - Agent Teams\n\nPersistent named agents with file-based JSONL inboxes. Each teammate runs\nits own agent loop in a separate thread. Communication via append-only inboxes.\n\n Subagent (s04): spawn -> execute -> return summary -> destroyed\n Teammate (s09): spawn -> work -> idle -> work -> ... -> shutdown\n\n .team/config.json .team/inbox/\n +----------------------------+ +------------------+\n | {\"team_name\": \"default\", | | alice.jsonl |\n | \"members\": [ | | bob.jsonl |\n | {\"name\":\"alice\", | | lead.jsonl |\n | \"role\":\"coder\", | +------------------+\n | \"status\":\"idle\"} |\n | ]} | send_message(\"alice\", \"fix bug\"):\n +----------------------------+ open(\"alice.jsonl\", \"a\").write(msg)\n\n read_inbox(\"alice\"):\n spawn_teammate(\"alice\",\"coder\",...) msgs = [json.loads(l) for l in ...]\n | open(\"alice.jsonl\", \"w\").close()\n v return msgs # drain\n Thread: alice Thread: bob\n +------------------+ +------------------+\n | agent_loop | | agent_loop |\n | status: working | | status: idle |\n | ... runs tools | | ... waits ... |\n | status -> idle | | |\n +------------------+ +------------------+\n\n 5 message types (all declared, not all handled here):\n +-------------------------+-----------------------------------+\n | message | Normal text message |\n | broadcast | Sent to all teammates |\n | shutdown_request | Request graceful shutdown (s10) |\n | shutdown_response | Approve/reject shutdown (s10) |\n | plan_approval_response | Approve/reject plan (s10) |\n +-------------------------+-----------------------------------+\n\nKey insight: \"Teammates that can talk to each other.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Spawn teammates and communicate via inboxes.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager: persistent named agents with config.json --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Use send_message to communicate. Complete your task.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member and member[\"status\"] != \"shutdown\":\n member[\"status\"] = \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead tool dispatch (9 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate that runs in its own thread.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates with name, role, status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ "layer": "platform",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: team mailboxes -- multiple models, coordinated through files.\n\"\"\"\ns15_agent_teams.py - Agent Teams\n\nPersistent named agents with file-based JSONL inboxes. Each teammate runs\nits own agent loop in a separate thread. Communication happens through\nappend-only inbox files.\n\n Subagent (s04): spawn -> execute -> return summary -> destroyed\n Teammate (s15): spawn -> work -> idle -> work -> ... -> shutdown\n\n .team/config.json .team/inbox/\n +----------------------------+ +------------------+\n | {\"team_name\": \"default\", | | alice.jsonl |\n | \"members\": [ | | bob.jsonl |\n | {\"name\":\"alice\", | | lead.jsonl |\n | \"role\":\"coder\", | +------------------+\n | \"status\":\"idle\"} |\n | ]} | send_message(\"alice\", \"fix bug\"):\n +----------------------------+ open(\"alice.jsonl\", \"a\").write(msg)\n\n read_inbox(\"alice\"):\n spawn_teammate(\"alice\",\"coder\",...) msgs = [json.loads(l) for l in ...]\n | open(\"alice.jsonl\", \"w\").close()\n v return msgs # drain\n Thread: alice Thread: bob\n +------------------+ +------------------+\n | agent_loop | | agent_loop |\n | status: working | | status: idle |\n | ... runs tools | | ... waits ... |\n | status -> idle | | |\n +------------------+ +------------------+\n\nKey idea: teammates have names, inboxes, and independent loops.\n\nRead this file in this order:\n1. MessageBus: how messages are queued and drained.\n2. TeammateManager: what persistent teammate state looks like.\n3. _teammate_loop / TOOL_HANDLERS: how each named teammate keeps re-entering the same tool loop.\n\nMost common confusion:\n- a teammate is not a one-shot subagent\n- an inbox message is not yet a full protocol request\n\nTeaching boundary:\nthis file teaches persistent named workers plus mailboxes.\nApproval protocols and autonomous policies are added in later chapters.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Spawn teammates and communicate via inboxes.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval\",\n \"plan_approval_response\",\n}\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager: persistent named agents with config.json --\nclass TeammateManager:\n \"\"\"Persistent teammate registry plus worker-loop launcher.\"\"\"\n\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Use send_message to communicate. Complete your task.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member and member[\"status\"] != \"shutdown\":\n member[\"status\"] = \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead tool dispatch (9 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate that runs in its own thread.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates with name, role, status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms15 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): team mailboxes(团队邮箱)——多个模型通过文件协同。\n\"\"\"\ns15_agent_teams.py - Agent Teams(智能体团队)\n\n具名持久智能体 + 文件化 JSONL 收件箱。\n每个队友在独立线程运行自己的 agent loop,\n通信通过 append-only 收件箱文件完成。\n\n 子智能体(s04)生命周期:创建 -> 执行 -> 返回摘要 -> 销毁\n 队友(s15)生命周期:创建 -> 工作 -> 空闲 -> 再工作 -> ... -> 关停\n\n 配置文件 `.team/config.json` 保存队友身份与状态:\n {\"team_name\":\"default\",\"members\":[{\"name\":\"alice\",\"role\":\"coder\",\"status\":\"idle\"}]}\n 收件箱目录 `.team/inbox/` 为每位队友维护独立 JSONL 文件(如 `alice.jsonl`)。\n\n 发送消息示例:\n send_message(\"alice\", \"fix bug\") -> 以追加方式写入 `alice.jsonl`\n 读取收件箱示例:\n read_inbox(\"alice\") -> 读取全部消息后清空文件(drain)\n\n 每个队友线程都运行自己的 agent_loop:\n status=working(工作中) -> status=idle(空闲) -> 等待下一条消息/任务\n\n核心观点:队友具备“名字、收件箱、独立循环”三要素。\n\n建议阅读顺序:\n1. MessageBus:消息如何入队并被 drain(取空)。\n2. TeammateManager:持久队友状态长什么样。\n3. _teammate_loop / TOOL_HANDLERS:具名队友如何反复进入同一工具循环。\n\n最常见混淆点:\n- teammate(队友)不是 one-shot subagent(一次性子智能体)\n- inbox message(收件箱消息)还不是完整 protocol request(协议请求)\n\n教学边界:\n本文件先讲“具名持久 worker + mailbox(邮箱)”。\n审批协议与自治策略放在后续章节。\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"你是位于 {WORKDIR} 的 team lead(团队负责人),请创建队友并通过收件箱通信。\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval\",\n \"plan_approval_response\",\n}\n\n\n# -- MessageBus:每位队友一个 JSONL 收件箱 --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager:基于 config.json 的具名持久智能体 --\nclass TeammateManager:\n \"\"\"持久化队友注册表与 worker(执行者)循环启动器。\"\"\"\n\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"你是 '{name}',角色为 {role},工作目录位于 {WORKDIR}。\"\n f\"请使用 send_message 与队友沟通,并完成分配任务。\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member and member[\"status\"] != \"shutdown\":\n member[\"status\"] = \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # 这些基础工具与 s02 保持一致\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # 这些基础工具与 s02 保持一致\n return [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"向队友发送消息。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"读取并清空自己的收件箱。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- 基础工具实现(与 s02 保持一致) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead(主控)工具分发(9 个工具) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n}\n\n# 这些基础工具与 s02 保持一致\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"创建在独立线程运行的持久队友。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"列出全部队友(name、role、status)。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"向队友收件箱发送消息。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"读取并清空 leader 收件箱。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"向全部队友广播消息。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms15 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: team mailboxes -- multiple models, coordinated through files.\n\"\"\"\ns15_agent_teams.py - Agent Teams\n\nPersistent named agents with file-based JSONL inboxes. Each teammate runs\nits own agent loop in a separate thread. Communication happens through\nappend-only inbox files.\n\n Subagent (s04): spawn -> execute -> return summary -> destroyed\n Teammate (s15): spawn -> work -> idle -> work -> ... -> shutdown\n\n .team/config.json .team/inbox/\n +----------------------------+ +------------------+\n | {\"team_name\": \"default\", | | alice.jsonl |\n | \"members\": [ | | bob.jsonl |\n | {\"name\":\"alice\", | | lead.jsonl |\n | \"role\":\"coder\", | +------------------+\n | \"status\":\"idle\"} |\n | ]} | send_message(\"alice\", \"fix bug\"):\n +----------------------------+ open(\"alice.jsonl\", \"a\").write(msg)\n\n read_inbox(\"alice\"):\n spawn_teammate(\"alice\",\"coder\",...) msgs = [json.loads(l) for l in ...]\n | open(\"alice.jsonl\", \"w\").close()\n v return msgs # drain\n Thread: alice Thread: bob\n +------------------+ +------------------+\n | agent_loop | | agent_loop |\n | status: working | | status: idle |\n | ... runs tools | | ... waits ... |\n | status -> idle | | |\n +------------------+ +------------------+\n\nKey idea: teammates have names, inboxes, and independent loops.\n\nRead this file in this order:\n1. MessageBus: how messages are queued and drained.\n2. TeammateManager: what persistent teammate state looks like.\n3. _teammate_loop / TOOL_HANDLERS: how each named teammate keeps re-entering the same tool loop.\n\nMost common confusion:\n- a teammate is not a one-shot subagent\n- an inbox message is not yet a full protocol request\n\nTeaching boundary:\nthis file teaches persistent named workers plus mailboxes.\nApproval protocols and autonomous policies are added in later chapters.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Spawn teammates and communicate via inboxes.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval\",\n \"plan_approval_response\",\n}\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager: persistent named agents with config.json --\nclass TeammateManager:\n \"\"\"Persistent teammate registry plus worker-loop launcher.\"\"\"\n\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Use send_message to communicate. Complete your task.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member and member[\"status\"] != \"shutdown\":\n member[\"status\"] = \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead tool dispatch (9 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate that runs in its own thread.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates with name, role, status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms15 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: team mailboxes -- multiple models, coordinated through files.\n\"\"\"\ns15_agent_teams.py - Agent Teams\n\nPersistent named agents with file-based JSONL inboxes. Each teammate runs\nits own agent loop in a separate thread. Communication happens through\nappend-only inbox files.\n\n Subagent (s04): spawn -> execute -> return summary -> destroyed\n Teammate (s15): spawn -> work -> idle -> work -> ... -> shutdown\n\n .team/config.json .team/inbox/\n +----------------------------+ +------------------+\n | {\"team_name\": \"default\", | | alice.jsonl |\n | \"members\": [ | | bob.jsonl |\n | {\"name\":\"alice\", | | lead.jsonl |\n | \"role\":\"coder\", | +------------------+\n | \"status\":\"idle\"} |\n | ]} | send_message(\"alice\", \"fix bug\"):\n +----------------------------+ open(\"alice.jsonl\", \"a\").write(msg)\n\n read_inbox(\"alice\"):\n spawn_teammate(\"alice\",\"coder\",...) msgs = [json.loads(l) for l in ...]\n | open(\"alice.jsonl\", \"w\").close()\n v return msgs # drain\n Thread: alice Thread: bob\n +------------------+ +------------------+\n | agent_loop | | agent_loop |\n | status: working | | status: idle |\n | ... runs tools | | ... waits ... |\n | status -> idle | | |\n +------------------+ +------------------+\n\nKey idea: teammates have names, inboxes, and independent loops.\n\nRead this file in this order:\n1. MessageBus: how messages are queued and drained.\n2. TeammateManager: what persistent teammate state looks like.\n3. _teammate_loop / TOOL_HANDLERS: how each named teammate keeps re-entering the same tool loop.\n\nMost common confusion:\n- a teammate is not a one-shot subagent\n- an inbox message is not yet a full protocol request\n\nTeaching boundary:\nthis file teaches persistent named workers plus mailboxes.\nApproval protocols and autonomous policies are added in later chapters.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Spawn teammates and communicate via inboxes.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval\",\n \"plan_approval_response\",\n}\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager: persistent named agents with config.json --\nclass TeammateManager:\n \"\"\"Persistent teammate registry plus worker-loop launcher.\"\"\"\n\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Use send_message to communicate. Complete your task.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member and member[\"status\"] != \"shutdown\":\n member[\"status\"] = \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead tool dispatch (9 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate that runs in its own thread.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates with name, role, status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms15 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
},
{
- "id": "s10",
- "filename": "s10_team_protocols.py",
+ "id": "s16",
+ "filename": "s16_team_protocols.py",
"title": "Team Protocols",
- "subtitle": "Shared Communication Rules",
- "loc": 419,
+ "subtitle": "Shared Request-Response Rules",
+ "loc": 482,
"tools": [
"bash",
"read_file",
@@ -561,76 +1150,86 @@
"plan_approval",
"shutdown_request"
],
- "coreAddition": "request_id correlation for two protocols",
- "keyInsight": "One request-response pattern drives all team negotiation",
+ "coreAddition": "Protocol envelopes + request correlation",
+ "keyInsight": "A protocol request is a structured message with an ID; the response must reference the same ID.",
"classes": [
{
"name": "MessageBus",
- "startLine": 87,
- "endLine": 128
+ "startLine": 98,
+ "endLine": 139
+ },
+ {
+ "name": "RequestStore",
+ "startLine": 143,
+ "endLine": 181
},
{
"name": "TeammateManager",
- "startLine": 133,
- "endLine": 290
+ "startLine": 186,
+ "endLine": 357
}
],
"functions": [
{
"name": "_safe_path",
"signature": "def _safe_path(p: str)",
- "startLine": 295
+ "startLine": 362
},
{
"name": "_run_bash",
"signature": "def _run_bash(command: str)",
- "startLine": 302
+ "startLine": 369
},
{
"name": "_run_read",
"signature": "def _run_read(path: str, limit: int = None)",
- "startLine": 317
+ "startLine": 384
},
{
"name": "_run_write",
"signature": "def _run_write(path: str, content: str)",
- "startLine": 327
+ "startLine": 394
},
{
"name": "_run_edit",
"signature": "def _run_edit(path: str, old_text: str, new_text: str)",
- "startLine": 337
+ "startLine": 404
},
{
"name": "handle_shutdown_request",
"signature": "def handle_shutdown_request(teammate: str)",
- "startLine": 350
+ "startLine": 417
},
{
"name": "handle_plan_review",
"signature": "def handle_plan_review(request_id: str, approve: bool, feedback: str = \"\")",
- "startLine": 361
+ "startLine": 435
},
{
"name": "_check_shutdown_status",
"signature": "def _check_shutdown_status(request_id: str)",
- "startLine": 375
+ "startLine": 453
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 425
+ "startLine": 502
}
],
- "layer": "collaboration",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns10_team_protocols.py - Team Protocols\n\nShutdown protocol and plan approval protocol, both using the same\nrequest_id correlation pattern. Builds on s09's team messaging.\n\n Shutdown FSM: pending -> approved | rejected\n\n Lead Teammate\n +---------------------+ +---------------------+\n | shutdown_request | | |\n | { | -------> | receives request |\n | request_id: abc | | decides: approve? |\n | } | | |\n +---------------------+ +---------------------+\n |\n +---------------------+ +-------v-------------+\n | shutdown_response | <------- | shutdown_response |\n | { | | { |\n | request_id: abc | | request_id: abc |\n | approve: true | | approve: true |\n | } | | } |\n +---------------------+ +---------------------+\n |\n v\n status -> \"shutdown\", thread stops\n\n Plan approval FSM: pending -> approved | rejected\n\n Teammate Lead\n +---------------------+ +---------------------+\n | plan_approval | | |\n | submit: {plan:\"...\"}| -------> | reviews plan text |\n +---------------------+ | approve/reject? |\n +---------------------+\n |\n +---------------------+ +-------v-------------+\n | plan_approval_resp | <------- | plan_approval |\n | {approve: true} | | review: {req_id, |\n +---------------------+ | approve: true} |\n +---------------------+\n\n Trackers: {request_id: {\"target|from\": name, \"status\": \"pending|...\"}}\n\nKey insight: \"Same request_id correlation pattern, two domains.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Manage teammates with shutdown and plan approval protocols.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n# -- Request trackers: correlate by request_id --\nshutdown_requests = {}\nplan_requests = {}\n_tracker_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager with shutdown + plan approval --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Submit plans via plan_approval before major work. \"\n f\"Respond to shutdown_request with shutdown_response.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n should_exit = False\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n if should_exit:\n break\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"shutdown_response\" and block.input.get(\"approve\"):\n should_exit = True\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member:\n member[\"status\"] = \"shutdown\" if should_exit else \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n with _tracker_lock:\n if req_id in shutdown_requests:\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": approve},\n )\n return f\"Shutdown {'approved' if approve else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval_response\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for lead approval.\"\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request. Approve to shut down, reject to keep working.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval. Provide plan text.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}' (status: pending)\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n with _tracker_lock:\n req = plan_requests.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n with _tracker_lock:\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {req['status']} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n with _tracker_lock:\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\n\n\n# -- Lead tool dispatch (12 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down gracefully. Returns a request_id for tracking.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check the status of a shutdown request by request_id.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan. Provide request_id + approve + optional feedback.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms10 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ "layer": "platform",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: protocols -- structured handshakes between models.\n\"\"\"\ns16_team_protocols.py - Team Protocols\n\nShutdown protocol and plan approval protocol, both using the same\nrequest_id correlation pattern. Builds on s15's mailbox-based team messaging.\n\n Shutdown FSM: pending -> approved | rejected\n\n Lead Teammate\n +---------------------+ +---------------------+\n | shutdown_request | | |\n | { | -------> | receives request |\n | request_id: abc | | decides: approve? |\n | } | | |\n +---------------------+ +---------------------+\n |\n +---------------------+ +-------v-------------+\n | shutdown_response | <------- | shutdown_response |\n | { | | { |\n | request_id: abc | | request_id: abc |\n | approve: true | | approve: true |\n | } | | } |\n +---------------------+ +---------------------+\n |\n v\n status -> \"shutdown\", thread stops\n\n Plan approval FSM: pending -> approved | rejected\n\n Teammate Lead\n +---------------------+ +---------------------+\n | plan_approval | | |\n | submit: {plan:\"...\"}| -------> | reviews plan text |\n +---------------------+ | approve/reject? |\n +---------------------+\n |\n +---------------------+ +-------v-------------+\n | plan_approval_response| <------ | plan_approval |\n | {approve: true} | | review: {req_id, |\n +---------------------+ | approve: true} |\n +---------------------+\n\n Request store: .team/requests/{request_id}.json\n\nKey idea: one request/response shape can support multiple kinds of team workflow.\nProtocol requests are structured workflow objects, not normal free-form chat.\n\nRead this file in this order:\n1. MessageBus: how protocol envelopes still travel through the same inbox surface.\n2. Request files under .team/requests: how a request keeps durable status after the message is sent.\n3. Protocol handlers: how shutdown and plan approval reuse the same correlation pattern.\n\nMost common confusion:\n- a protocol request is not a normal teammate chat message\n- a request record is not a task record\n\nTeaching boundary:\nthis file teaches durable handshakes first.\nAutonomous claiming, task selection, and worktree assignment stay in later chapters.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nREQUESTS_DIR = TEAM_DIR / \"requests\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Manage teammates with shutdown and plan approval protocols.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval\",\n \"plan_approval_response\",\n}\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\nclass RequestStore:\n \"\"\"\n Durable request records for protocol workflows.\n\n Protocol state should survive long enough to inspect, resume, or reconcile.\n This store keeps one JSON file per request_id under .team/requests/.\n \"\"\"\n\n def __init__(self, base_dir: Path):\n self.dir = base_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._lock = threading.Lock()\n\n def _path(self, request_id: str) -> Path:\n return self.dir / f\"{request_id}.json\"\n\n def create(self, record: dict) -> dict:\n request_id = record[\"request_id\"]\n with self._lock:\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n def get(self, request_id: str) -> dict | None:\n path = self._path(request_id)\n if not path.exists():\n return None\n return json.loads(path.read_text())\n\n def update(self, request_id: str, **changes) -> dict | None:\n with self._lock:\n record = self.get(request_id)\n if not record:\n return None\n record.update(changes)\n record[\"updated_at\"] = time.time()\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n\nREQUEST_STORE = RequestStore(REQUESTS_DIR)\n\n\n# -- TeammateManager with shutdown + plan approval --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Submit plans via plan_approval before major work. \"\n f\"Respond to shutdown_request with shutdown_response.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n should_exit = False\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n if should_exit:\n break\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"shutdown_response\" and block.input.get(\"approve\"):\n should_exit = True\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member:\n member[\"status\"] = \"shutdown\" if should_exit else \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n updated = REQUEST_STORE.update(\n req_id,\n status=\"approved\" if approve else \"rejected\",\n resolved_by=sender,\n resolved_at=time.time(),\n response={\"approve\": approve, \"reason\": args.get(\"reason\", \"\")},\n )\n if not updated:\n return f\"Error: Unknown shutdown request {req_id}\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": approve},\n )\n return f\"Shutdown {'approved' if approve else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"plan_approval\",\n \"from\": sender,\n \"to\": \"lead\",\n \"status\": \"pending\",\n \"plan\": plan_text,\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for lead approval.\"\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request. Approve to shut down, reject to keep working.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval. Provide plan text.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"shutdown\",\n \"from\": \"lead\",\n \"to\": teammate,\n \"status\": \"pending\",\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}' (status: pending)\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n req = REQUEST_STORE.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n REQUEST_STORE.update(\n request_id,\n status=\"approved\" if approve else \"rejected\",\n reviewed_by=\"lead\",\n resolved_at=time.time(),\n feedback=feedback,\n )\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {'approved' if approve else 'rejected'} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n return json.dumps(REQUEST_STORE.get(request_id) or {\"error\": \"not found\"})\n\n\n# -- Lead tool dispatch (12 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down gracefully. Returns a request_id for tracking.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check the status of a shutdown request by request_id.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan. Provide request_id + approve + optional feedback.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms16 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): protocols(协议)——模型间结构化握手。\n\"\"\"\ns16_team_protocols.py - Team Protocols(团队协议)\n\nShutdown protocol(关停协议)与 plan approval protocol(计划审批协议)\n共用同一 request_id correlation pattern(关联模式),并建立在 s15 的邮箱通信之上。\n\n Shutdown FSM(状态机): pending -> approved | rejected\n 1) Lead 发送 `shutdown_request`(包含 request_id)\n 2) Teammate 收到后给出 approve/reject 决策\n 3) Teammate 回传 `shutdown_response`(携带同一 request_id)\n 4) Lead 更新状态;若 approved,则状态进入 \"shutdown\" 并停止线程\n\n Plan approval FSM(计划审批状态机): pending -> approved | rejected\n 1) Teammate 发送 `plan_approval`(提交计划文本)\n 2) Lead 审阅后给出 approve/reject\n 3) Lead 回传 `plan_approval_response`(包含 request_id 与审批结果)\n\n Request store(请求存储):\n `.team/requests/{request_id}.json`\n\n核心观点:同一 request/response 结构可支撑多种团队工作流。\n协议请求是结构化工作流对象,不是普通自由聊天消息。\n\n建议阅读顺序:\n1. MessageBus:协议信封如何仍走同一 inbox(收件箱)通道。\n2. `.team/requests` 请求文件:消息发出后请求状态如何保持可追踪。\n3. 协议处理器:shutdown 与 plan approval 如何复用同一 request_id 关联模式。\n\n最常见混淆点:\n- protocol request(协议请求)不是普通队友聊天消息\n- request record(请求记录)不是 task record(任务记录)\n\n教学边界:\n本文件先讲 durable handshakes(持久握手)。\n自主认领、任务选择与 worktree 分配放在后续章节。\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nREQUESTS_DIR = TEAM_DIR / \"requests\"\n\nSYSTEM = f\"你是位于 {WORKDIR} 的 team lead(团队负责人),请通过 shutdown 与 plan approval 协议管理队友。\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval\",\n \"plan_approval_response\",\n}\n\n# -- MessageBus:每位队友一个 JSONL 收件箱 --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\nclass RequestStore:\n \"\"\"\n 协议工作流的持久请求记录。\n\n 协议状态应具备可检查、可恢复、可对账的持久性。\n 本存储在 `.team/requests/` 下按 request_id 保存 JSON 文件。\n \"\"\"\n\n def __init__(self, base_dir: Path):\n self.dir = base_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._lock = threading.Lock()\n\n def _path(self, request_id: str) -> Path:\n return self.dir / f\"{request_id}.json\"\n\n def create(self, record: dict) -> dict:\n request_id = record[\"request_id\"]\n with self._lock:\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n def get(self, request_id: str) -> dict | None:\n path = self._path(request_id)\n if not path.exists():\n return None\n return json.loads(path.read_text())\n\n def update(self, request_id: str, **changes) -> dict | None:\n with self._lock:\n record = self.get(request_id)\n if not record:\n return None\n record.update(changes)\n record[\"updated_at\"] = time.time()\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n\nREQUEST_STORE = RequestStore(REQUESTS_DIR)\n\n\n# -- TeammateManager(含 shutdown + plan approval 协议) --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"你是 '{name}',角色为 {role},工作目录位于 {WORKDIR}。\"\n f\"重大工作前请先通过 plan_approval 提交计划。\"\n f\"收到 shutdown_request 时,请用 shutdown_response 回复。\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n should_exit = False\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n if should_exit:\n break\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"shutdown_response\" and block.input.get(\"approve\"):\n should_exit = True\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member:\n member[\"status\"] = \"shutdown\" if should_exit else \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # 这些基础工具与 s02 保持一致\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n updated = REQUEST_STORE.update(\n req_id,\n status=\"approved\" if approve else \"rejected\",\n resolved_by=sender,\n resolved_at=time.time(),\n response={\"approve\": approve, \"reason\": args.get(\"reason\", \"\")},\n )\n if not updated:\n return f\"Error: 未知的 shutdown request {req_id}\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": approve},\n )\n return f\"Shutdown {'approved' if approve else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"plan_approval\",\n \"from\": sender,\n \"to\": \"lead\",\n \"status\": \"pending\",\n \"plan\": plan_text,\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"计划已提交(request_id={req_id})。等待 lead(主控)审批。\"\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # 基础工具与 s02 保持一致\n return [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"向队友发送消息。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"读取并清空自己的收件箱。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"响应 shutdown 请求。approve=同意关停,reject=继续工作。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"提交计划给 lead 审批,需提供计划文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- 基础工具实现(与 s02 保持一致) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead(主控)侧协议处理器 --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"shutdown\",\n \"from\": \"lead\",\n \"to\": teammate,\n \"status\": \"pending\",\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n \"lead\", teammate, \"请平滑关停。\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}' (status: pending)\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n req = REQUEST_STORE.get(request_id)\n if not req:\n return f\"Error: 未知的 plan request_id '{request_id}'\"\n REQUEST_STORE.update(\n request_id,\n status=\"approved\" if approve else \"rejected\",\n reviewed_by=\"lead\",\n resolved_at=time.time(),\n feedback=feedback,\n )\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {'approved' if approve else 'rejected'} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n return json.dumps(REQUEST_STORE.get(request_id) or {\"error\": \"not found\"})\n\n\n# -- Lead(主控)工具分发(12 个工具) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n}\n\n# 这些基础工具与 s02 保持一致\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"创建持久队友。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"列出全部队友。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"向队友发送消息。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"读取并清空 lead 收件箱。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"向全体队友广播消息。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"请求某队友优雅关停,返回可追踪 request_id。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"按 request_id 查询 shutdown 请求状态。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"审批队友计划:request_id + approve + 可选反馈。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms16 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: protocols -- structured handshakes between models.\n\"\"\"\ns16_team_protocols.py - Team Protocols\n\nShutdown protocol and plan approval protocol, both using the same\nrequest_id correlation pattern. Builds on s15's mailbox-based team messaging.\n\n Shutdown FSM: pending -> approved | rejected\n\n Lead Teammate\n +---------------------+ +---------------------+\n | shutdown_request | | |\n | { | -------> | receives request |\n | request_id: abc | | decides: approve? |\n | } | | |\n +---------------------+ +---------------------+\n |\n +---------------------+ +-------v-------------+\n | shutdown_response | <------- | shutdown_response |\n | { | | { |\n | request_id: abc | | request_id: abc |\n | approve: true | | approve: true |\n | } | | } |\n +---------------------+ +---------------------+\n |\n v\n status -> \"shutdown\", thread stops\n\n Plan approval FSM: pending -> approved | rejected\n\n Teammate Lead\n +---------------------+ +---------------------+\n | plan_approval | | |\n | submit: {plan:\"...\"}| -------> | reviews plan text |\n +---------------------+ | approve/reject? |\n +---------------------+\n |\n +---------------------+ +-------v-------------+\n | plan_approval_response| <------ | plan_approval |\n | {approve: true} | | review: {req_id, |\n +---------------------+ | approve: true} |\n +---------------------+\n\n Request store: .team/requests/{request_id}.json\n\nKey idea: one request/response shape can support multiple kinds of team workflow.\nProtocol requests are structured workflow objects, not normal free-form chat.\n\nRead this file in this order:\n1. MessageBus: how protocol envelopes still travel through the same inbox surface.\n2. Request files under .team/requests: how a request keeps durable status after the message is sent.\n3. Protocol handlers: how shutdown and plan approval reuse the same correlation pattern.\n\nMost common confusion:\n- a protocol request is not a normal teammate chat message\n- a request record is not a task record\n\nTeaching boundary:\nthis file teaches durable handshakes first.\nAutonomous claiming, task selection, and worktree assignment stay in later chapters.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nREQUESTS_DIR = TEAM_DIR / \"requests\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Manage teammates with shutdown and plan approval protocols.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval\",\n \"plan_approval_response\",\n}\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\nclass RequestStore:\n \"\"\"\n Durable request records for protocol workflows.\n\n Protocol state should survive long enough to inspect, resume, or reconcile.\n This store keeps one JSON file per request_id under .team/requests/.\n \"\"\"\n\n def __init__(self, base_dir: Path):\n self.dir = base_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._lock = threading.Lock()\n\n def _path(self, request_id: str) -> Path:\n return self.dir / f\"{request_id}.json\"\n\n def create(self, record: dict) -> dict:\n request_id = record[\"request_id\"]\n with self._lock:\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n def get(self, request_id: str) -> dict | None:\n path = self._path(request_id)\n if not path.exists():\n return None\n return json.loads(path.read_text())\n\n def update(self, request_id: str, **changes) -> dict | None:\n with self._lock:\n record = self.get(request_id)\n if not record:\n return None\n record.update(changes)\n record[\"updated_at\"] = time.time()\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n\nREQUEST_STORE = RequestStore(REQUESTS_DIR)\n\n\n# -- TeammateManager with shutdown + plan approval --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Submit plans via plan_approval before major work. \"\n f\"Respond to shutdown_request with shutdown_response.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n should_exit = False\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n if should_exit:\n break\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"shutdown_response\" and block.input.get(\"approve\"):\n should_exit = True\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member:\n member[\"status\"] = \"shutdown\" if should_exit else \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n updated = REQUEST_STORE.update(\n req_id,\n status=\"approved\" if approve else \"rejected\",\n resolved_by=sender,\n resolved_at=time.time(),\n response={\"approve\": approve, \"reason\": args.get(\"reason\", \"\")},\n )\n if not updated:\n return f\"Error: Unknown shutdown request {req_id}\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": approve},\n )\n return f\"Shutdown {'approved' if approve else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"plan_approval\",\n \"from\": sender,\n \"to\": \"lead\",\n \"status\": \"pending\",\n \"plan\": plan_text,\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for lead approval.\"\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request. Approve to shut down, reject to keep working.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval. Provide plan text.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"shutdown\",\n \"from\": \"lead\",\n \"to\": teammate,\n \"status\": \"pending\",\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}' (status: pending)\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n req = REQUEST_STORE.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n REQUEST_STORE.update(\n request_id,\n status=\"approved\" if approve else \"rejected\",\n reviewed_by=\"lead\",\n resolved_at=time.time(),\n feedback=feedback,\n )\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {'approved' if approve else 'rejected'} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n return json.dumps(REQUEST_STORE.get(request_id) or {\"error\": \"not found\"})\n\n\n# -- Lead tool dispatch (12 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down gracefully. Returns a request_id for tracking.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check the status of a shutdown request by request_id.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan. Provide request_id + approve + optional feedback.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms16 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: protocols -- structured handshakes between models.\n\"\"\"\ns16_team_protocols.py - Team Protocols\n\nShutdown protocol and plan approval protocol, both using the same\nrequest_id correlation pattern. Builds on s15's mailbox-based team messaging.\n\n Shutdown FSM: pending -> approved | rejected\n\n Lead Teammate\n +---------------------+ +---------------------+\n | shutdown_request | | |\n | { | -------> | receives request |\n | request_id: abc | | decides: approve? |\n | } | | |\n +---------------------+ +---------------------+\n |\n +---------------------+ +-------v-------------+\n | shutdown_response | <------- | shutdown_response |\n | { | | { |\n | request_id: abc | | request_id: abc |\n | approve: true | | approve: true |\n | } | | } |\n +---------------------+ +---------------------+\n |\n v\n status -> \"shutdown\", thread stops\n\n Plan approval FSM: pending -> approved | rejected\n\n Teammate Lead\n +---------------------+ +---------------------+\n | plan_approval | | |\n | submit: {plan:\"...\"}| -------> | reviews plan text |\n +---------------------+ | approve/reject? |\n +---------------------+\n |\n +---------------------+ +-------v-------------+\n | plan_approval_response| <------ | plan_approval |\n | {approve: true} | | review: {req_id, |\n +---------------------+ | approve: true} |\n +---------------------+\n\n Request store: .team/requests/{request_id}.json\n\nKey idea: one request/response shape can support multiple kinds of team workflow.\nProtocol requests are structured workflow objects, not normal free-form chat.\n\nRead this file in this order:\n1. MessageBus: how protocol envelopes still travel through the same inbox surface.\n2. Request files under .team/requests: how a request keeps durable status after the message is sent.\n3. Protocol handlers: how shutdown and plan approval reuse the same correlation pattern.\n\nMost common confusion:\n- a protocol request is not a normal teammate chat message\n- a request record is not a task record\n\nTeaching boundary:\nthis file teaches durable handshakes first.\nAutonomous claiming, task selection, and worktree assignment stay in later chapters.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nREQUESTS_DIR = TEAM_DIR / \"requests\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Manage teammates with shutdown and plan approval protocols.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval\",\n \"plan_approval_response\",\n}\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\nclass RequestStore:\n \"\"\"\n Durable request records for protocol workflows.\n\n Protocol state should survive long enough to inspect, resume, or reconcile.\n This store keeps one JSON file per request_id under .team/requests/.\n \"\"\"\n\n def __init__(self, base_dir: Path):\n self.dir = base_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._lock = threading.Lock()\n\n def _path(self, request_id: str) -> Path:\n return self.dir / f\"{request_id}.json\"\n\n def create(self, record: dict) -> dict:\n request_id = record[\"request_id\"]\n with self._lock:\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n def get(self, request_id: str) -> dict | None:\n path = self._path(request_id)\n if not path.exists():\n return None\n return json.loads(path.read_text())\n\n def update(self, request_id: str, **changes) -> dict | None:\n with self._lock:\n record = self.get(request_id)\n if not record:\n return None\n record.update(changes)\n record[\"updated_at\"] = time.time()\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n\nREQUEST_STORE = RequestStore(REQUESTS_DIR)\n\n\n# -- TeammateManager with shutdown + plan approval --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Submit plans via plan_approval before major work. \"\n f\"Respond to shutdown_request with shutdown_response.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n should_exit = False\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n if should_exit:\n break\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"shutdown_response\" and block.input.get(\"approve\"):\n should_exit = True\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member:\n member[\"status\"] = \"shutdown\" if should_exit else \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n updated = REQUEST_STORE.update(\n req_id,\n status=\"approved\" if approve else \"rejected\",\n resolved_by=sender,\n resolved_at=time.time(),\n response={\"approve\": approve, \"reason\": args.get(\"reason\", \"\")},\n )\n if not updated:\n return f\"Error: Unknown shutdown request {req_id}\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": approve},\n )\n return f\"Shutdown {'approved' if approve else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"plan_approval\",\n \"from\": sender,\n \"to\": \"lead\",\n \"status\": \"pending\",\n \"plan\": plan_text,\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for lead approval.\"\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request. Approve to shut down, reject to keep working.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval. Provide plan text.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"shutdown\",\n \"from\": \"lead\",\n \"to\": teammate,\n \"status\": \"pending\",\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}' (status: pending)\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n req = REQUEST_STORE.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n REQUEST_STORE.update(\n request_id,\n status=\"approved\" if approve else \"rejected\",\n reviewed_by=\"lead\",\n resolved_at=time.time(),\n feedback=feedback,\n )\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {'approved' if approve else 'rejected'} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n return json.dumps(REQUEST_STORE.get(request_id) or {\"error\": \"not found\"})\n\n\n# -- Lead tool dispatch (12 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down gracefully. Returns a request_id for tracking.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check the status of a shutdown request by request_id.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan. Provide request_id + approve + optional feedback.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}:\")\n print(str(output)[:200])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms16 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
},
{
- "id": "s11",
- "filename": "s11_autonomous_agents.py",
+ "id": "s17",
+ "filename": "s17_autonomous_agents.py",
"title": "Autonomous Agents",
- "subtitle": "Scan Board, Claim Tasks",
- "loc": 499,
+ "subtitle": "Self-Claim and Self-Resume",
+ "loc": 603,
"tools": [
"bash",
"read_file",
@@ -651,91 +1250,116 @@
"idle",
"claim_task"
],
- "coreAddition": "Task board polling + timeout-based self-governance",
- "keyInsight": "Teammates scan the board and claim tasks themselves; no need for the lead to assign each one",
+ "coreAddition": "Idle polling + role-aware self-claim + resume context",
+ "keyInsight": "Autonomy is a bounded mechanism -- idle, scan, claim, resume -- not magic.",
"classes": [
{
"name": "MessageBus",
- "startLine": 80,
- "endLine": 121
+ "startLine": 84,
+ "endLine": 125
+ },
+ {
+ "name": "RequestStore",
+ "startLine": 129,
+ "endLine": 167
},
{
"name": "TeammateManager",
- "startLine": 159,
- "endLine": 368
+ "startLine": 249,
+ "endLine": 480
}
],
"functions": [
{
- "name": "scan_unclaimed_tasks",
- "signature": "def scan_unclaimed_tasks()",
- "startLine": 126
+ "name": "_append_claim_event",
+ "signature": "def _append_claim_event(payload: dict)",
+ "startLine": 172
+ },
+ {
+ "name": "_task_allows_role",
+ "signature": "def _task_allows_role(task: dict, role: str | None)",
+ "startLine": 178
+ },
+ {
+ "name": "is_claimable_task",
+ "signature": "def is_claimable_task(task: dict, role: str | None = None)",
+ "startLine": 185
},
{
- "name": "claim_task",
- "signature": "def claim_task(task_id: int, owner: str)",
- "startLine": 138
+ "name": "scan_unclaimed_tasks",
+ "signature": "def scan_unclaimed_tasks(role: str | None = None)",
+ "startLine": 194
},
{
"name": "make_identity_block",
"signature": "def make_identity_block(name: str, role: str, team_name: str)",
- "startLine": 151
+ "startLine": 234
+ },
+ {
+ "name": "ensure_identity_context",
+ "signature": "def ensure_identity_context(messages: list, name: str, role: str, team_name: str)",
+ "startLine": 241
},
{
"name": "_safe_path",
"signature": "def _safe_path(p: str)",
- "startLine": 373
+ "startLine": 485
},
{
"name": "_run_bash",
"signature": "def _run_bash(command: str)",
- "startLine": 380
+ "startLine": 492
},
{
"name": "_run_read",
"signature": "def _run_read(path: str, limit: int = None)",
- "startLine": 395
+ "startLine": 507
},
{
"name": "_run_write",
"signature": "def _run_write(path: str, content: str)",
- "startLine": 405
+ "startLine": 517
},
{
"name": "_run_edit",
"signature": "def _run_edit(path: str, old_text: str, new_text: str)",
- "startLine": 415
+ "startLine": 527
},
{
"name": "handle_shutdown_request",
"signature": "def handle_shutdown_request(teammate: str)",
- "startLine": 428
+ "startLine": 540
},
{
"name": "handle_plan_review",
"signature": "def handle_plan_review(request_id: str, approve: bool, feedback: str = \"\")",
- "startLine": 439
+ "startLine": 558
},
{
"name": "_check_shutdown_status",
"signature": "def _check_shutdown_status(request_id: str)",
- "startLine": 453
+ "startLine": 576
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 509
+ "startLine": 631
}
],
- "layer": "collaboration",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns11_autonomous_agents.py - Autonomous Agents\n\nIdle cycle with task board polling, auto-claiming unclaimed tasks, and\nidentity re-injection after context compression. Builds on s10's protocols.\n\n Teammate lifecycle:\n +-------+\n | spawn |\n +---+---+\n |\n v\n +-------+ tool_use +-------+\n | WORK | <----------- | LLM |\n +---+---+ +-------+\n |\n | stop_reason != tool_use\n v\n +--------+\n | IDLE | poll every 5s for up to 60s\n +---+----+\n |\n +---> check inbox -> message? -> resume WORK\n |\n +---> scan .tasks/ -> unclaimed? -> claim -> resume WORK\n |\n +---> timeout (60s) -> shutdown\n\n Identity re-injection after compression:\n messages = [identity_block, ...remaining...]\n \"You are 'coder', role: backend, team: my-team\"\n\nKey insight: \"The agent finds work itself.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nTASKS_DIR = WORKDIR / \".tasks\"\n\nPOLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Teammates are autonomous -- they find work themselves.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n# -- Request trackers --\nshutdown_requests = {}\nplan_requests = {}\n_tracker_lock = threading.Lock()\n_claim_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- Task board scanning --\ndef scan_unclaimed_tasks() -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n\n\ndef claim_task(task_id: int, owner: str) -> str:\n with _claim_lock:\n path = TASKS_DIR / f\"task_{task_id}.json\"\n if not path.exists():\n return f\"Error: Task {task_id} not found\"\n task = json.loads(path.read_text())\n task[\"owner\"] = owner\n task[\"status\"] = \"in_progress\"\n path.write_text(json.dumps(task, indent=2))\n return f\"Claimed task #{task_id} for {owner}\"\n\n\n# -- Identity re-injection after compression --\ndef make_identity_block(name: str, role: str, team_name: str) -> dict:\n return {\n \"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, team: {team_name}. Continue your work. \",\n }\n\n\n# -- Autonomous TeammateManager --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def _set_status(self, name: str, status: str):\n member = self._find_member(name)\n if member:\n member[\"status\"] = status\n self._save_config()\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _loop(self, name: str, role: str, prompt: str):\n team_name = self.config[\"team_name\"]\n sys_prompt = (\n f\"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. \"\n f\"Use idle tool when you have no more work. You will auto-claim new tasks.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n\n while True:\n # -- WORK PHASE: standard agent loop --\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n self._set_status(name, \"idle\")\n return\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n idle_requested = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"idle\":\n idle_requested = True\n output = \"Entering idle phase. Will poll for new tasks.\"\n else:\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n if idle_requested:\n break\n\n # -- IDLE PHASE: poll for inbox messages and unclaimed tasks --\n self._set_status(name, \"idle\")\n resume = False\n polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1)\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n resume = True\n break\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n claim_task(task[\"id\"], name)\n task_prompt = (\n f\"Task #{task['id']}: {task['subject']}\\n\"\n f\"{task.get('description', '')} \"\n )\n if len(messages) <= 3:\n messages.insert(0, make_identity_block(name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\", \"content\": f\"I am {name}. Continuing.\"})\n messages.append({\"role\": \"user\", \"content\": task_prompt})\n messages.append({\"role\": \"assistant\", \"content\": f\"Claimed task #{task['id']}. Working on it.\"})\n resume = True\n break\n\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n with _tracker_lock:\n if req_id in shutdown_requests:\n shutdown_requests[req_id][\"status\"] = \"approved\" if args[\"approve\"] else \"rejected\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": args[\"approve\"]},\n )\n return f\"Shutdown {'approved' if args['approve'] else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval_response\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for approval.\"\n if tool_name == \"claim_task\":\n return claim_task(args[\"task_id\"], sender)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n {\"name\": \"idle\", \"description\": \"Signal that you have no more work. Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the task board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}'\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n with _tracker_lock:\n req = plan_requests.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n with _tracker_lock:\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {req['status']} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n with _tracker_lock:\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\n\n\n# -- Lead tool dispatch (14 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n \"idle\": lambda **kw: \"Lead does not idle.\",\n \"claim_task\": lambda **kw: claim_task(kw[\"task_id\"], \"lead\"),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check shutdown request status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"idle\", \"description\": \"Enter idle state (for lead -- rarely used).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms11 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n if query.strip() == \"/tasks\":\n TASKS_DIR.mkdir(exist_ok=True)\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n t = json.loads(f.read_text())\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" @{t['owner']}\" if t.get(\"owner\") else \"\"\n print(f\" {marker} #{t['id']}: {t['subject']}{owner}\")\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ "layer": "platform",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: autonomy -- models that find work without being told.\n\"\"\"\ns17_autonomous_agents.py - Autonomous Agents\n\nIdle cycle with task board polling, auto-claiming unclaimed tasks, and\nidentity re-injection after context compression. Builds on task boards,\nteam mailboxes, and protocol support from earlier chapters.\n\n Teammate lifecycle:\n +-------+\n | spawn |\n +---+---+\n |\n v\n +-------+ tool_use +-------+\n | WORK | <----------- | LLM |\n +---+---+ +-------+\n |\n | stop_reason != tool_use\n v\n +--------+\n | IDLE | poll every 5s for up to 60s\n +---+----+\n |\n +---> check inbox -> message? -> resume WORK\n |\n +---> scan .tasks/ -> unclaimed? -> claim -> resume WORK\n |\n +---> timeout (60s) -> shutdown\n\n Identity re-injection after compression:\n messages = [identity_block, ...remaining...]\n \"You are 'coder', role: backend, team: my-team\"\n\nKey idea: an idle teammate can safely claim ready work instead of waiting\nfor every assignment from the lead.\nA teammate here is a long-lived worker, not a one-shot subagent that only\nreturns a single summary.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nTASKS_DIR = WORKDIR / \".tasks\"\nREQUESTS_DIR = TEAM_DIR / \"requests\"\nCLAIM_EVENTS_PATH = TASKS_DIR / \"claim_events.jsonl\"\n\nPOLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Teammates are autonomous -- they find work themselves.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval\",\n \"plan_approval_response\",\n}\n\n_claim_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\nclass RequestStore:\n \"\"\"\n Durable protocol request records.\n\n s17 should not regress from s16 back to in-memory trackers. These request\n files let autonomous teammates inspect or resume protocol state later.\n \"\"\"\n\n def __init__(self, base_dir: Path):\n self.dir = base_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._lock = threading.Lock()\n\n def _path(self, request_id: str) -> Path:\n return self.dir / f\"{request_id}.json\"\n\n def create(self, record: dict) -> dict:\n request_id = record[\"request_id\"]\n with self._lock:\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n def get(self, request_id: str) -> dict | None:\n path = self._path(request_id)\n if not path.exists():\n return None\n return json.loads(path.read_text())\n\n def update(self, request_id: str, **changes) -> dict | None:\n with self._lock:\n record = self.get(request_id)\n if not record:\n return None\n record.update(changes)\n record[\"updated_at\"] = time.time()\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n\nREQUEST_STORE = RequestStore(REQUESTS_DIR)\n\n\n# -- Task board scanning --\ndef _append_claim_event(payload: dict):\n TASKS_DIR.mkdir(parents=True, exist_ok=True)\n with CLAIM_EVENTS_PATH.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n\ndef _task_allows_role(task: dict, role: str | None) -> bool:\n required_role = task.get(\"claim_role\") or task.get(\"required_role\") or \"\"\n if not required_role:\n return True\n return bool(role) and role == required_role\n\n\ndef is_claimable_task(task: dict, role: str | None = None) -> bool:\n return (\n task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")\n and _task_allows_role(task, role)\n )\n\n\ndef scan_unclaimed_tasks(role: str | None = None) -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if is_claimable_task(task, role):\n unclaimed.append(task)\n return unclaimed\n\n\ndef claim_task(\n task_id: int,\n owner: str,\n role: str | None = None,\n source: str = \"manual\",\n) -> str:\n with _claim_lock:\n path = TASKS_DIR / f\"task_{task_id}.json\"\n if not path.exists():\n return f\"Error: Task {task_id} not found\"\n task = json.loads(path.read_text())\n if not is_claimable_task(task, role):\n return f\"Error: Task {task_id} is not claimable for role={role or '(any)'}\"\n task[\"owner\"] = owner\n task[\"status\"] = \"in_progress\"\n task[\"claimed_at\"] = time.time()\n task[\"claim_source\"] = source\n path.write_text(json.dumps(task, indent=2))\n _append_claim_event({\n \"event\": \"task.claimed\",\n \"task_id\": task_id,\n \"owner\": owner,\n \"role\": role,\n \"source\": source,\n \"ts\": time.time(),\n })\n return f\"Claimed task #{task_id} for {owner} via {source}\"\n\n\n# -- Identity re-injection after compression --\ndef make_identity_block(name: str, role: str, team_name: str) -> dict:\n return {\n \"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, team: {team_name}. Continue your work. \",\n }\n\n\ndef ensure_identity_context(messages: list, name: str, role: str, team_name: str):\n if messages and \"\" in str(messages[0].get(\"content\", \"\")):\n return\n messages.insert(0, make_identity_block(name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\", \"content\": f\"I am {name}. Continuing.\"})\n\n\n# -- Autonomous TeammateManager --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def _set_status(self, name: str, status: str):\n member = self._find_member(name)\n if member:\n member[\"status\"] = status\n self._save_config()\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _loop(self, name: str, role: str, prompt: str):\n team_name = self.config[\"team_name\"]\n sys_prompt = (\n f\"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. \"\n f\"Use idle tool when you have no more work. You will auto-claim new tasks.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n\n while True:\n # -- WORK PHASE: standard agent loop --\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n self._set_status(name, \"idle\")\n return\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n idle_requested = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"idle\":\n idle_requested = True\n output = \"Entering idle phase. Will poll for new tasks.\"\n else:\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n if idle_requested:\n break\n\n # -- IDLE PHASE: poll for inbox messages and unclaimed tasks --\n self._set_status(name, \"idle\")\n resume = False\n polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1)\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n ensure_identity_context(messages, name, role, team_name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n resume = True\n break\n unclaimed = scan_unclaimed_tasks(role)\n if unclaimed:\n task = unclaimed[0]\n claim_result = claim_task(\n task[\"id\"], name, role=role, source=\"auto\"\n )\n if claim_result.startswith(\"Error:\"):\n continue\n task_prompt = (\n f\"Task #{task['id']}: {task['subject']}\\n\"\n f\"{task.get('description', '')} \"\n )\n ensure_identity_context(messages, name, role, team_name)\n messages.append({\"role\": \"user\", \"content\": task_prompt})\n messages.append({\"role\": \"assistant\", \"content\": f\"{claim_result}. Working on it.\"})\n resume = True\n break\n\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n updated = REQUEST_STORE.update(\n req_id,\n status=\"approved\" if args[\"approve\"] else \"rejected\",\n resolved_by=sender,\n resolved_at=time.time(),\n response={\"approve\": args[\"approve\"], \"reason\": args.get(\"reason\", \"\")},\n )\n if not updated:\n return f\"Error: Unknown shutdown request {req_id}\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": args[\"approve\"]},\n )\n return f\"Shutdown {'approved' if args['approve'] else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"plan_approval\",\n \"from\": sender,\n \"to\": \"lead\",\n \"status\": \"pending\",\n \"plan\": plan_text,\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for approval.\"\n if tool_name == \"claim_task\":\n return claim_task(\n args[\"task_id\"],\n sender,\n role=self._find_member(sender).get(\"role\") if self._find_member(sender) else None,\n source=\"manual\",\n )\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n {\"name\": \"idle\", \"description\": \"Signal that you have no more work. Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the task board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"shutdown\",\n \"from\": \"lead\",\n \"to\": teammate,\n \"status\": \"pending\",\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}'\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n req = REQUEST_STORE.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n REQUEST_STORE.update(\n request_id,\n status=\"approved\" if approve else \"rejected\",\n reviewed_by=\"lead\",\n resolved_at=time.time(),\n feedback=feedback,\n )\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {'approved' if approve else 'rejected'} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n return json.dumps(REQUEST_STORE.get(request_id) or {\"error\": \"not found\"})\n\n\n# -- Lead tool dispatch (14 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n \"idle\": lambda **kw: \"Lead does not idle.\",\n \"claim_task\": lambda **kw: claim_task(kw[\"task_id\"], \"lead\"),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check shutdown request status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"idle\", \"description\": \"Enter idle state (for lead -- rarely used).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms17 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n if query.strip() == \"/tasks\":\n TASKS_DIR.mkdir(exist_ok=True)\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n t = json.loads(f.read_text())\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" @{t['owner']}\" if t.get(\"owner\") else \"\"\n print(f\" {marker} #{t['id']}: {t['subject']}{owner}\")\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): autonomy(自主性)——模型无需等待指派也能发现工作。\n\"\"\"\ns17_autonomous_agents.py - Autonomous Agents(自主智能体)\n\n通过 idle 循环轮询任务板、自动认领未归属任务,并在上下文压缩后重注入身份信息。\n本章建立在此前的任务板、团队邮箱与协议机制之上。\n\n Teammate lifecycle(队友生命周期):\n +-------+\n | spawn(创建) |\n +---+---+\n |\n v\n +-------+ tool_use(工具调用) +-------+\n | WORK(工作) | <----------- | LLM |\n +---+---+ +-------+\n |\n | stop_reason != tool_use(未触发工具调用)\n v\n +--------+\n | IDLE | 每 5s 轮询一次,最长 60s\n +---+----+\n |\n +---> check inbox(检查收件箱)-> 有消息? -> resume WORK\n |\n +---> scan .tasks/(扫描任务板)-> 有未认领? -> claim -> resume WORK\n |\n +---> timeout(60s)-> shutdown(关停)\n\n Identity re-injection after compression(压缩后身份重注入):\n messages = [identity_block(身份块), ...remaining(其余消息)...]\n \"You are 'coder', role: backend, team: my-team\"(你是 coder,角色 backend,团队 my-team)\n\n核心观点:处于 idle 的队友可安全认领可执行任务,\n而不是每次都等待 lead(负责人)指派。\n这里的 teammate(队友)是长生命周期 worker(执行者),不是一次性 subagent(子智能体)。\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nTASKS_DIR = WORKDIR / \".tasks\"\nREQUESTS_DIR = TEAM_DIR / \"requests\"\nCLAIM_EVENTS_PATH = TASKS_DIR / \"claim_events.jsonl\"\n\nPOLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\nSYSTEM = f\"你是位于 {WORKDIR} 的 team lead(团队负责人),队友具备自主性,会自行发现工作。\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval\",\n \"plan_approval_response\",\n}\n\n_claim_lock = threading.Lock()\n\n\n# -- MessageBus:每位队友一个 JSONL 收件箱 --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\nclass RequestStore:\n \"\"\"\n 协议请求的持久化记录。\n\n s17 不应从 s16 退回内存态追踪。\n 这些请求文件允许自主队友在后续检查或恢复协议状态。\n \"\"\"\n\n def __init__(self, base_dir: Path):\n self.dir = base_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._lock = threading.Lock()\n\n def _path(self, request_id: str) -> Path:\n return self.dir / f\"{request_id}.json\"\n\n def create(self, record: dict) -> dict:\n request_id = record[\"request_id\"]\n with self._lock:\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n def get(self, request_id: str) -> dict | None:\n path = self._path(request_id)\n if not path.exists():\n return None\n return json.loads(path.read_text())\n\n def update(self, request_id: str, **changes) -> dict | None:\n with self._lock:\n record = self.get(request_id)\n if not record:\n return None\n record.update(changes)\n record[\"updated_at\"] = time.time()\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n\nREQUEST_STORE = RequestStore(REQUESTS_DIR)\n\n\n# -- 任务板扫描 --\ndef _append_claim_event(payload: dict):\n TASKS_DIR.mkdir(parents=True, exist_ok=True)\n with CLAIM_EVENTS_PATH.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n\ndef _task_allows_role(task: dict, role: str | None) -> bool:\n required_role = task.get(\"claim_role\") or task.get(\"required_role\") or \"\"\n if not required_role:\n return True\n return bool(role) and role == required_role\n\n\ndef is_claimable_task(task: dict, role: str | None = None) -> bool:\n return (\n task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")\n and _task_allows_role(task, role)\n )\n\n\ndef scan_unclaimed_tasks(role: str | None = None) -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if is_claimable_task(task, role):\n unclaimed.append(task)\n return unclaimed\n\n\ndef claim_task(\n task_id: int,\n owner: str,\n role: str | None = None,\n source: str = \"manual\",\n) -> str:\n with _claim_lock:\n path = TASKS_DIR / f\"task_{task_id}.json\"\n if not path.exists():\n return f\"Error: Task {task_id} not found\"\n task = json.loads(path.read_text())\n if not is_claimable_task(task, role):\n return f\"Error: 任务 {task_id} 对 role={role or '(any)'} 不可认领\"\n task[\"owner\"] = owner\n task[\"status\"] = \"in_progress\"\n task[\"claimed_at\"] = time.time()\n task[\"claim_source\"] = source\n path.write_text(json.dumps(task, indent=2))\n _append_claim_event({\n \"event\": \"task.claimed\",\n \"task_id\": task_id,\n \"owner\": owner,\n \"role\": role,\n \"source\": source,\n \"ts\": time.time(),\n })\n return f\"Claimed task #{task_id} for {owner} via {source}\"\n\n\n# -- 压缩后的身份重注入 --\ndef make_identity_block(name: str, role: str, team_name: str) -> dict:\n return {\n \"role\": \"user\",\n \"content\": f\"你是 '{name}',角色 {role},团队 {team_name}。请继续当前工作。 \",\n }\n\n\ndef ensure_identity_context(messages: list, name: str, role: str, team_name: str):\n if messages and \"\" in str(messages[0].get(\"content\", \"\")):\n return\n messages.insert(0, make_identity_block(name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\", \"content\": f\"I am {name}. Continuing.\"})\n\n\n# -- 自主版 TeammateManager --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def _set_status(self, name: str, status: str):\n member = self._find_member(name)\n if member:\n member[\"status\"] = status\n self._save_config()\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _loop(self, name: str, role: str, prompt: str):\n team_name = self.config[\"team_name\"]\n sys_prompt = (\n f\"你是 '{name}',角色 {role},团队 {team_name},工作目录位于 {WORKDIR}。\"\n f\"当当前工作完成后请调用 idle;系统会自动认领新任务。\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n\n while True:\n # -- WORK PHASE(工作阶段):标准智能体循环 --\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n self._set_status(name, \"idle\")\n return\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n idle_requested = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"idle\":\n idle_requested = True\n output = \"进入 idle(空闲)阶段,将轮询新任务。\"\n else:\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n if idle_requested:\n break\n\n # -- IDLE PHASE(空闲阶段):轮询收件箱与未认领任务 --\n self._set_status(name, \"idle\")\n resume = False\n polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1)\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n ensure_identity_context(messages, name, role, team_name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n resume = True\n break\n unclaimed = scan_unclaimed_tasks(role)\n if unclaimed:\n task = unclaimed[0]\n claim_result = claim_task(\n task[\"id\"], name, role=role, source=\"auto\"\n )\n if claim_result.startswith(\"Error:\"):\n continue\n task_prompt = (\n f\"Task #{task['id']}: {task['subject']}\\n\"\n f\"{task.get('description', '')} \"\n )\n ensure_identity_context(messages, name, role, team_name)\n messages.append({\"role\": \"user\", \"content\": task_prompt})\n messages.append({\"role\": \"assistant\", \"content\": f\"{claim_result}. Working on it.\"})\n resume = True\n break\n\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # 这些基础工具与 s02 保持一致\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n updated = REQUEST_STORE.update(\n req_id,\n status=\"approved\" if args[\"approve\"] else \"rejected\",\n resolved_by=sender,\n resolved_at=time.time(),\n response={\"approve\": args[\"approve\"], \"reason\": args.get(\"reason\", \"\")},\n )\n if not updated:\n return f\"Error: 未知的 shutdown request {req_id}\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": args[\"approve\"]},\n )\n return f\"Shutdown {'approved' if args['approve'] else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"plan_approval\",\n \"from\": sender,\n \"to\": \"lead\",\n \"status\": \"pending\",\n \"plan\": plan_text,\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"计划已提交(request_id={req_id})。等待审批。\"\n if tool_name == \"claim_task\":\n return claim_task(\n args[\"task_id\"],\n sender,\n role=self._find_member(sender).get(\"role\") if self._find_member(sender) else None,\n source=\"manual\",\n )\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # 这些基础工具与 s02 保持一致\n return [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"向队友发送消息。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"读取并清空自己的收件箱。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"响应 shutdown 请求。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"提交计划给 lead 审批。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n {\"name\": \"idle\", \"description\": \"声明当前无可执行工作,进入 idle 轮询阶段。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"按 ID 从任务板认领任务。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- 基础工具实现(与 s02 保持一致) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead(主控)侧协议处理器 --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"shutdown\",\n \"from\": \"lead\",\n \"to\": teammate,\n \"status\": \"pending\",\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n \"lead\", teammate, \"请平滑关停。\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}'\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n req = REQUEST_STORE.get(request_id)\n if not req:\n return f\"Error: 未知的 plan request_id '{request_id}'\"\n REQUEST_STORE.update(\n request_id,\n status=\"approved\" if approve else \"rejected\",\n reviewed_by=\"lead\",\n resolved_at=time.time(),\n feedback=feedback,\n )\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {'approved' if approve else 'rejected'} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n return json.dumps(REQUEST_STORE.get(request_id) or {\"error\": \"not found\"})\n\n\n# -- Lead(主控)工具分发(14 个工具) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n \"idle\": lambda **kw: \"Lead does not idle.\",\n \"claim_task\": lambda **kw: claim_task(kw[\"task_id\"], \"lead\"),\n}\n\n# 这些基础工具与 s02 保持一致\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"创建自主队友。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"列出全部队友。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"向队友发送消息。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"读取并清空 lead 收件箱。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"向全体队友广播消息。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"请求队友执行关停。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"查询 shutdown 请求状态。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"审批队友计划(通过/拒绝)。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"idle\", \"description\": \"进入 idle 状态(lead 侧很少使用)。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"按 ID 从任务板认领任务。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms17 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n if query.strip() == \"/tasks\":\n TASKS_DIR.mkdir(exist_ok=True)\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n t = json.loads(f.read_text())\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" @{t['owner']}\" if t.get(\"owner\") else \"\"\n print(f\" {marker} #{t['id']}: {t['subject']}{owner}\")\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: autonomy -- models that find work without being told.\n\"\"\"\ns17_autonomous_agents.py - Autonomous Agents\n\nIdle cycle with task board polling, auto-claiming unclaimed tasks, and\nidentity re-injection after context compression. Builds on task boards,\nteam mailboxes, and protocol support from earlier chapters.\n\n Teammate lifecycle:\n +-------+\n | spawn |\n +---+---+\n |\n v\n +-------+ tool_use +-------+\n | WORK | <----------- | LLM |\n +---+---+ +-------+\n |\n | stop_reason != tool_use\n v\n +--------+\n | IDLE | poll every 5s for up to 60s\n +---+----+\n |\n +---> check inbox -> message? -> resume WORK\n |\n +---> scan .tasks/ -> unclaimed? -> claim -> resume WORK\n |\n +---> timeout (60s) -> shutdown\n\n Identity re-injection after compression:\n messages = [identity_block, ...remaining...]\n \"You are 'coder', role: backend, team: my-team\"\n\nKey idea: an idle teammate can safely claim ready work instead of waiting\nfor every assignment from the lead.\nA teammate here is a long-lived worker, not a one-shot subagent that only\nreturns a single summary.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nTASKS_DIR = WORKDIR / \".tasks\"\nREQUESTS_DIR = TEAM_DIR / \"requests\"\nCLAIM_EVENTS_PATH = TASKS_DIR / \"claim_events.jsonl\"\n\nPOLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Teammates are autonomous -- they find work themselves.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval\",\n \"plan_approval_response\",\n}\n\n_claim_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\nclass RequestStore:\n \"\"\"\n Durable protocol request records.\n\n s17 should not regress from s16 back to in-memory trackers. These request\n files let autonomous teammates inspect or resume protocol state later.\n \"\"\"\n\n def __init__(self, base_dir: Path):\n self.dir = base_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._lock = threading.Lock()\n\n def _path(self, request_id: str) -> Path:\n return self.dir / f\"{request_id}.json\"\n\n def create(self, record: dict) -> dict:\n request_id = record[\"request_id\"]\n with self._lock:\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n def get(self, request_id: str) -> dict | None:\n path = self._path(request_id)\n if not path.exists():\n return None\n return json.loads(path.read_text())\n\n def update(self, request_id: str, **changes) -> dict | None:\n with self._lock:\n record = self.get(request_id)\n if not record:\n return None\n record.update(changes)\n record[\"updated_at\"] = time.time()\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n\nREQUEST_STORE = RequestStore(REQUESTS_DIR)\n\n\n# -- Task board scanning --\ndef _append_claim_event(payload: dict):\n TASKS_DIR.mkdir(parents=True, exist_ok=True)\n with CLAIM_EVENTS_PATH.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n\ndef _task_allows_role(task: dict, role: str | None) -> bool:\n required_role = task.get(\"claim_role\") or task.get(\"required_role\") or \"\"\n if not required_role:\n return True\n return bool(role) and role == required_role\n\n\ndef is_claimable_task(task: dict, role: str | None = None) -> bool:\n return (\n task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")\n and _task_allows_role(task, role)\n )\n\n\ndef scan_unclaimed_tasks(role: str | None = None) -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if is_claimable_task(task, role):\n unclaimed.append(task)\n return unclaimed\n\n\ndef claim_task(\n task_id: int,\n owner: str,\n role: str | None = None,\n source: str = \"manual\",\n) -> str:\n with _claim_lock:\n path = TASKS_DIR / f\"task_{task_id}.json\"\n if not path.exists():\n return f\"Error: Task {task_id} not found\"\n task = json.loads(path.read_text())\n if not is_claimable_task(task, role):\n return f\"Error: Task {task_id} is not claimable for role={role or '(any)'}\"\n task[\"owner\"] = owner\n task[\"status\"] = \"in_progress\"\n task[\"claimed_at\"] = time.time()\n task[\"claim_source\"] = source\n path.write_text(json.dumps(task, indent=2))\n _append_claim_event({\n \"event\": \"task.claimed\",\n \"task_id\": task_id,\n \"owner\": owner,\n \"role\": role,\n \"source\": source,\n \"ts\": time.time(),\n })\n return f\"Claimed task #{task_id} for {owner} via {source}\"\n\n\n# -- Identity re-injection after compression --\ndef make_identity_block(name: str, role: str, team_name: str) -> dict:\n return {\n \"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, team: {team_name}. Continue your work. \",\n }\n\n\ndef ensure_identity_context(messages: list, name: str, role: str, team_name: str):\n if messages and \"\" in str(messages[0].get(\"content\", \"\")):\n return\n messages.insert(0, make_identity_block(name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\", \"content\": f\"I am {name}. Continuing.\"})\n\n\n# -- Autonomous TeammateManager --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def _set_status(self, name: str, status: str):\n member = self._find_member(name)\n if member:\n member[\"status\"] = status\n self._save_config()\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _loop(self, name: str, role: str, prompt: str):\n team_name = self.config[\"team_name\"]\n sys_prompt = (\n f\"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. \"\n f\"Use idle tool when you have no more work. You will auto-claim new tasks.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n\n while True:\n # -- WORK PHASE: standard agent loop --\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n self._set_status(name, \"idle\")\n return\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n idle_requested = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"idle\":\n idle_requested = True\n output = \"Entering idle phase. Will poll for new tasks.\"\n else:\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n if idle_requested:\n break\n\n # -- IDLE PHASE: poll for inbox messages and unclaimed tasks --\n self._set_status(name, \"idle\")\n resume = False\n polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1)\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n ensure_identity_context(messages, name, role, team_name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n resume = True\n break\n unclaimed = scan_unclaimed_tasks(role)\n if unclaimed:\n task = unclaimed[0]\n claim_result = claim_task(\n task[\"id\"], name, role=role, source=\"auto\"\n )\n if claim_result.startswith(\"Error:\"):\n continue\n task_prompt = (\n f\"Task #{task['id']}: {task['subject']}\\n\"\n f\"{task.get('description', '')} \"\n )\n ensure_identity_context(messages, name, role, team_name)\n messages.append({\"role\": \"user\", \"content\": task_prompt})\n messages.append({\"role\": \"assistant\", \"content\": f\"{claim_result}. Working on it.\"})\n resume = True\n break\n\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n updated = REQUEST_STORE.update(\n req_id,\n status=\"approved\" if args[\"approve\"] else \"rejected\",\n resolved_by=sender,\n resolved_at=time.time(),\n response={\"approve\": args[\"approve\"], \"reason\": args.get(\"reason\", \"\")},\n )\n if not updated:\n return f\"Error: Unknown shutdown request {req_id}\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": args[\"approve\"]},\n )\n return f\"Shutdown {'approved' if args['approve'] else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"plan_approval\",\n \"from\": sender,\n \"to\": \"lead\",\n \"status\": \"pending\",\n \"plan\": plan_text,\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for approval.\"\n if tool_name == \"claim_task\":\n return claim_task(\n args[\"task_id\"],\n sender,\n role=self._find_member(sender).get(\"role\") if self._find_member(sender) else None,\n source=\"manual\",\n )\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n {\"name\": \"idle\", \"description\": \"Signal that you have no more work. Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the task board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"shutdown\",\n \"from\": \"lead\",\n \"to\": teammate,\n \"status\": \"pending\",\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}'\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n req = REQUEST_STORE.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n REQUEST_STORE.update(\n request_id,\n status=\"approved\" if approve else \"rejected\",\n reviewed_by=\"lead\",\n resolved_at=time.time(),\n feedback=feedback,\n )\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {'approved' if approve else 'rejected'} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n return json.dumps(REQUEST_STORE.get(request_id) or {\"error\": \"not found\"})\n\n\n# -- Lead tool dispatch (14 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n \"idle\": lambda **kw: \"Lead does not idle.\",\n \"claim_task\": lambda **kw: claim_task(kw[\"task_id\"], \"lead\"),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check shutdown request status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"idle\", \"description\": \"Enter idle state (for lead -- rarely used).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms17 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n if query.strip() == \"/tasks\":\n TASKS_DIR.mkdir(exist_ok=True)\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n t = json.loads(f.read_text())\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" @{t['owner']}\" if t.get(\"owner\") else \"\"\n print(f\" {marker} #{t['id']}: {t['subject']}{owner}\")\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: autonomy -- models that find work without being told.\n\"\"\"\ns17_autonomous_agents.py - Autonomous Agents\n\nIdle cycle with task board polling, auto-claiming unclaimed tasks, and\nidentity re-injection after context compression. Builds on task boards,\nteam mailboxes, and protocol support from earlier chapters.\n\n Teammate lifecycle:\n +-------+\n | spawn |\n +---+---+\n |\n v\n +-------+ tool_use +-------+\n | WORK | <----------- | LLM |\n +---+---+ +-------+\n |\n | stop_reason != tool_use\n v\n +--------+\n | IDLE | poll every 5s for up to 60s\n +---+----+\n |\n +---> check inbox -> message? -> resume WORK\n |\n +---> scan .tasks/ -> unclaimed? -> claim -> resume WORK\n |\n +---> timeout (60s) -> shutdown\n\n Identity re-injection after compression:\n messages = [identity_block, ...remaining...]\n \"You are 'coder', role: backend, team: my-team\"\n\nKey idea: an idle teammate can safely claim ready work instead of waiting\nfor every assignment from the lead.\nA teammate here is a long-lived worker, not a one-shot subagent that only\nreturns a single summary.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nTASKS_DIR = WORKDIR / \".tasks\"\nREQUESTS_DIR = TEAM_DIR / \"requests\"\nCLAIM_EVENTS_PATH = TASKS_DIR / \"claim_events.jsonl\"\n\nPOLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Teammates are autonomous -- they find work themselves.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval\",\n \"plan_approval_response\",\n}\n\n_claim_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\nclass RequestStore:\n \"\"\"\n Durable protocol request records.\n\n s17 should not regress from s16 back to in-memory trackers. These request\n files let autonomous teammates inspect or resume protocol state later.\n \"\"\"\n\n def __init__(self, base_dir: Path):\n self.dir = base_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._lock = threading.Lock()\n\n def _path(self, request_id: str) -> Path:\n return self.dir / f\"{request_id}.json\"\n\n def create(self, record: dict) -> dict:\n request_id = record[\"request_id\"]\n with self._lock:\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n def get(self, request_id: str) -> dict | None:\n path = self._path(request_id)\n if not path.exists():\n return None\n return json.loads(path.read_text())\n\n def update(self, request_id: str, **changes) -> dict | None:\n with self._lock:\n record = self.get(request_id)\n if not record:\n return None\n record.update(changes)\n record[\"updated_at\"] = time.time()\n self._path(request_id).write_text(json.dumps(record, indent=2))\n return record\n\n\nREQUEST_STORE = RequestStore(REQUESTS_DIR)\n\n\n# -- Task board scanning --\ndef _append_claim_event(payload: dict):\n TASKS_DIR.mkdir(parents=True, exist_ok=True)\n with CLAIM_EVENTS_PATH.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n\ndef _task_allows_role(task: dict, role: str | None) -> bool:\n required_role = task.get(\"claim_role\") or task.get(\"required_role\") or \"\"\n if not required_role:\n return True\n return bool(role) and role == required_role\n\n\ndef is_claimable_task(task: dict, role: str | None = None) -> bool:\n return (\n task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")\n and _task_allows_role(task, role)\n )\n\n\ndef scan_unclaimed_tasks(role: str | None = None) -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if is_claimable_task(task, role):\n unclaimed.append(task)\n return unclaimed\n\n\ndef claim_task(\n task_id: int,\n owner: str,\n role: str | None = None,\n source: str = \"manual\",\n) -> str:\n with _claim_lock:\n path = TASKS_DIR / f\"task_{task_id}.json\"\n if not path.exists():\n return f\"Error: Task {task_id} not found\"\n task = json.loads(path.read_text())\n if not is_claimable_task(task, role):\n return f\"Error: Task {task_id} is not claimable for role={role or '(any)'}\"\n task[\"owner\"] = owner\n task[\"status\"] = \"in_progress\"\n task[\"claimed_at\"] = time.time()\n task[\"claim_source\"] = source\n path.write_text(json.dumps(task, indent=2))\n _append_claim_event({\n \"event\": \"task.claimed\",\n \"task_id\": task_id,\n \"owner\": owner,\n \"role\": role,\n \"source\": source,\n \"ts\": time.time(),\n })\n return f\"Claimed task #{task_id} for {owner} via {source}\"\n\n\n# -- Identity re-injection after compression --\ndef make_identity_block(name: str, role: str, team_name: str) -> dict:\n return {\n \"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, team: {team_name}. Continue your work. \",\n }\n\n\ndef ensure_identity_context(messages: list, name: str, role: str, team_name: str):\n if messages and \"\" in str(messages[0].get(\"content\", \"\")):\n return\n messages.insert(0, make_identity_block(name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\", \"content\": f\"I am {name}. Continuing.\"})\n\n\n# -- Autonomous TeammateManager --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def _set_status(self, name: str, status: str):\n member = self._find_member(name)\n if member:\n member[\"status\"] = status\n self._save_config()\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _loop(self, name: str, role: str, prompt: str):\n team_name = self.config[\"team_name\"]\n sys_prompt = (\n f\"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. \"\n f\"Use idle tool when you have no more work. You will auto-claim new tasks.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n\n while True:\n # -- WORK PHASE: standard agent loop --\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n self._set_status(name, \"idle\")\n return\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n idle_requested = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"idle\":\n idle_requested = True\n output = \"Entering idle phase. Will poll for new tasks.\"\n else:\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n if idle_requested:\n break\n\n # -- IDLE PHASE: poll for inbox messages and unclaimed tasks --\n self._set_status(name, \"idle\")\n resume = False\n polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1)\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n ensure_identity_context(messages, name, role, team_name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n resume = True\n break\n unclaimed = scan_unclaimed_tasks(role)\n if unclaimed:\n task = unclaimed[0]\n claim_result = claim_task(\n task[\"id\"], name, role=role, source=\"auto\"\n )\n if claim_result.startswith(\"Error:\"):\n continue\n task_prompt = (\n f\"Task #{task['id']}: {task['subject']}\\n\"\n f\"{task.get('description', '')} \"\n )\n ensure_identity_context(messages, name, role, team_name)\n messages.append({\"role\": \"user\", \"content\": task_prompt})\n messages.append({\"role\": \"assistant\", \"content\": f\"{claim_result}. Working on it.\"})\n resume = True\n break\n\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n updated = REQUEST_STORE.update(\n req_id,\n status=\"approved\" if args[\"approve\"] else \"rejected\",\n resolved_by=sender,\n resolved_at=time.time(),\n response={\"approve\": args[\"approve\"], \"reason\": args.get(\"reason\", \"\")},\n )\n if not updated:\n return f\"Error: Unknown shutdown request {req_id}\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": args[\"approve\"]},\n )\n return f\"Shutdown {'approved' if args['approve'] else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"plan_approval\",\n \"from\": sender,\n \"to\": \"lead\",\n \"status\": \"pending\",\n \"plan\": plan_text,\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for approval.\"\n if tool_name == \"claim_task\":\n return claim_task(\n args[\"task_id\"],\n sender,\n role=self._find_member(sender).get(\"role\") if self._find_member(sender) else None,\n source=\"manual\",\n )\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n {\"name\": \"idle\", \"description\": \"Signal that you have no more work. Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the task board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n REQUEST_STORE.create({\n \"request_id\": req_id,\n \"kind\": \"shutdown\",\n \"from\": \"lead\",\n \"to\": teammate,\n \"status\": \"pending\",\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n })\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}'\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n req = REQUEST_STORE.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n REQUEST_STORE.update(\n request_id,\n status=\"approved\" if approve else \"rejected\",\n reviewed_by=\"lead\",\n resolved_at=time.time(),\n feedback=feedback,\n )\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {'approved' if approve else 'rejected'} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n return json.dumps(REQUEST_STORE.get(request_id) or {\"error\": \"not found\"})\n\n\n# -- Lead tool dispatch (14 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n \"idle\": lambda **kw: \"Lead does not idle.\",\n \"claim_task\": lambda **kw: claim_task(kw[\"task_id\"], \"lead\"),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check shutdown request status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"idle\", \"description\": \"Enter idle state (for lead -- rarely used).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms17 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n if query.strip() == \"/tasks\":\n TASKS_DIR.mkdir(exist_ok=True)\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n t = json.loads(f.read_text())\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" @{t['owner']}\" if t.get(\"owner\") else \"\"\n print(f\" {marker} #{t['id']}: {t['subject']}{owner}\")\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
},
{
- "id": "s12",
- "filename": "s12_worktree_task_isolation.py",
- "title": "Worktree + Task Isolation",
- "subtitle": "Isolate by Directory",
- "loc": 694,
+ "id": "s18",
+ "filename": "s18_worktree_task_isolation.py",
+ "title": "Worktree Isolation",
+ "subtitle": "Separate Directory, Separate Lane",
+ "loc": 564,
"tools": [
"bash",
"read_file",
@@ -748,8 +1372,10 @@
"task_bind_worktree",
"worktree_create",
"worktree_list",
+ "worktree_enter",
"worktree_status",
"worktree_run",
+ "worktree_closeout",
"worktree_remove",
"worktree_keep",
"worktree_events"
@@ -762,70 +1388,169 @@
"task_bind_worktree",
"worktree_create",
"worktree_list",
+ "worktree_enter",
"worktree_status",
"worktree_run",
+ "worktree_closeout",
"worktree_remove",
"worktree_keep",
"worktree_events"
],
- "coreAddition": "Composable worktree lifecycle + event stream over a shared task board",
- "keyInsight": "Each works in its own directory; tasks manage goals, worktrees manage directories, bound by ID",
+ "coreAddition": "Task-worktree state + explicit enter/closeout lifecycle",
+ "keyInsight": "Tasks answer what; worktrees answer where. Keep them separate.",
"classes": [
{
"name": "EventBus",
- "startLine": 82,
+ "startLine": 89,
"endLine": 120
},
{
"name": "TaskManager",
"startLine": 121,
- "endLine": 218
+ "endLine": 227
},
{
"name": "WorktreeManager",
- "startLine": 224,
- "endLine": 472
+ "startLine": 233,
+ "endLine": 474
}
],
"functions": [
{
"name": "detect_repo_root",
"signature": "def detect_repo_root(cwd: Path)",
- "startLine": 52
+ "startLine": 66
},
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
- "startLine": 477
+ "startLine": 479
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
- "startLine": 484
+ "startLine": 485
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
- "startLine": 503
+ "startLine": 497
+ },
+ {
+ "name": "run_write",
+ "signature": "def run_write(path: str, content: str)",
+ "startLine": 506
+ },
+ {
+ "name": "run_edit",
+ "signature": "def run_edit(path: str, old_text: str, new_text: str)",
+ "startLine": 515
+ },
+ {
+ "name": "agent_loop",
+ "signature": "def agent_loop(messages: list)",
+ "startLine": 600
+ }
+ ],
+ "layer": "platform",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: directory isolation -- parallel execution lanes that never collide.\n\"\"\"\ns18_worktree_task_isolation.py - Worktree + Task Isolation\n\nDirectory-level isolation for parallel task execution.\nTasks are the control plane and worktrees are the execution plane.\n\n .tasks/task_12.json\n {\n \"id\": 12,\n \"subject\": \"Implement auth refactor\",\n \"status\": \"in_progress\",\n \"worktree\": \"auth-refactor\"\n }\n\n .worktrees/index.json\n {\n \"worktrees\": [\n {\n \"name\": \"auth-refactor\",\n \"path\": \".../.worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\"\n }\n ]\n }\n\nKey insight: \"Isolate by directory, coordinate by task ID.\"\n\nRead this file in this order:\n1. EventBus: how worktree lifecycle stays observable.\n2. TaskManager: how a task binds to an execution lane without becoming the lane itself.\n3. Worktree registry / closeout helpers: how directory state is created, tracked, and cleaned up.\n\nMost common confusion:\n- a worktree is not the task itself\n- a worktree record is not just a path string\n\nTeaching boundary:\nthis file teaches isolated execution lanes first.\nCross-machine execution, merge automation, and enterprise policy glue are intentionally out of scope.\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\ndef detect_repo_root(cwd: Path) -> Path | None:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--show-toplevel\"],\n cwd=cwd, capture_output=True, text=True, timeout=10,\n )\n root = Path(r.stdout.strip())\n return root if r.returncode == 0 and root.exists() else None\n except Exception:\n return None\n\n\nREPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Use task + worktree tools for multi-task work. \"\n \"For parallel or risky changes: create tasks, allocate worktree lanes, \"\n \"run commands in those lanes, then choose keep/remove for closeout.\"\n)\n\n\n# -- EventBus: append-only lifecycle events for observability --\nclass EventBus:\n def __init__(self, event_log_path: Path):\n self.path = event_log_path\n self.path.parent.mkdir(parents=True, exist_ok=True)\n if not self.path.exists():\n self.path.write_text(\"\")\n\n def emit(self, event: str, task_id=None, wt_name=None, error=None, **extra):\n payload = {\"event\": event, \"ts\": time.time()}\n if task_id is not None:\n payload[\"task_id\"] = task_id\n if wt_name:\n payload[\"worktree\"] = wt_name\n if error:\n payload[\"error\"] = error\n payload.update(extra)\n with self.path.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n def list_recent(self, limit: int = 20) -> str:\n n = max(1, min(int(limit or 20), 200))\n lines = self.path.read_text(encoding=\"utf-8\").splitlines()\n items = []\n for line in lines[-n:]:\n try:\n items.append(json.loads(line))\n except Exception:\n items.append({\"event\": \"parse_error\", \"raw\": line})\n return json.dumps(items, indent=2)\n\n\n# -- TaskManager: persistent task board with optional worktree binding --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = []\n for f in self.dir.glob(\"task_*.json\"):\n try:\n ids.append(int(f.stem.split(\"_\")[1]))\n except Exception:\n pass\n return max(ids) if ids else 0\n\n def _path(self, task_id: int) -> Path:\n return self.dir / f\"task_{task_id}.json\"\n\n def _load(self, task_id: int) -> dict:\n path = self._path(task_id)\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n self._path(task[\"id\"]).write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"owner\": \"\", \"worktree\": \"\",\n \"worktree_state\": \"unbound\", \"last_worktree\": \"\",\n \"closeout\": None, \"blockedBy\": [],\n \"created_at\": time.time(), \"updated_at\": time.time(),\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def exists(self, task_id: int) -> bool:\n return self._path(task_id).exists()\n\n def update(self, task_id: int, status: str = None, owner: str = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\", \"deleted\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n if owner is not None:\n task[\"owner\"] = owner\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n task[\"last_worktree\"] = worktree\n task[\"worktree_state\"] = \"active\"\n if owner:\n task[\"owner\"] = owner\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def unbind_worktree(self, task_id: int) -> str:\n task = self._load(task_id)\n task[\"worktree\"] = \"\"\n task[\"worktree_state\"] = \"unbound\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def record_closeout(self, task_id: int, action: str, reason: str = \"\", keep_binding: bool = False) -> str:\n task = self._load(task_id)\n task[\"closeout\"] = {\n \"action\": action,\n \"reason\": reason,\n \"at\": time.time(),\n }\n task[\"worktree_state\"] = action\n if not keep_binding:\n task[\"worktree\"] = \"\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\", \"deleted\": \"[-]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n wt = f\" wt={t['worktree']}\" if t.get(\"worktree\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{wt}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(REPO_ROOT / \".tasks\")\nEVENTS = EventBus(REPO_ROOT / \".worktrees\" / \"events.jsonl\")\n\n\n# -- WorktreeManager: create/list/run/remove git worktrees --\nclass WorktreeManager:\n def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus):\n self.repo_root = repo_root\n self.tasks = tasks\n self.events = events\n self.dir = repo_root / \".worktrees\"\n self.dir.mkdir(parents=True, exist_ok=True)\n self.index_path = self.dir / \"index.json\"\n if not self.index_path.exists():\n self.index_path.write_text(json.dumps({\"worktrees\": []}, indent=2))\n self.git_available = self._check_git()\n\n def _check_git(self) -> bool:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--is-inside-work-tree\"],\n cwd=self.repo_root, capture_output=True, text=True, timeout=10,\n )\n return r.returncode == 0\n except Exception:\n return False\n\n def _run_git(self, args: list[str]) -> str:\n if not self.git_available:\n raise RuntimeError(\"Not in a git repository.\")\n r = subprocess.run(\n [\"git\", *args], cwd=self.repo_root,\n capture_output=True, text=True, timeout=120,\n )\n if r.returncode != 0:\n raise RuntimeError((r.stdout + r.stderr).strip() or f\"git {' '.join(args)} failed\")\n return (r.stdout + r.stderr).strip() or \"(no output)\"\n\n def _load_index(self) -> dict:\n return json.loads(self.index_path.read_text())\n\n def _save_index(self, data: dict):\n self.index_path.write_text(json.dumps(data, indent=2))\n\n def _find(self, name: str) -> dict | None:\n for wt in self._load_index().get(\"worktrees\", []):\n if wt.get(\"name\") == name:\n return wt\n return None\n\n def _update_entry(self, name: str, **changes) -> dict:\n idx = self._load_index()\n updated = None\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item.update(changes)\n updated = item\n break\n self._save_index(idx)\n if not updated:\n raise ValueError(f\"Worktree '{name}' not found in index\")\n return updated\n\n def _validate_name(self, name: str):\n if not re.fullmatch(r\"[A-Za-z0-9._-]{1,40}\", name or \"\"):\n raise ValueError(\"Invalid worktree name. Use 1-40 chars: letters, digits, ., _, -\")\n\n def create(self, name: str, task_id: int = None, base_ref: str = \"HEAD\") -> str:\n self._validate_name(name)\n if self._find(name):\n raise ValueError(f\"Worktree '{name}' already exists\")\n if task_id is not None and not self.tasks.exists(task_id):\n raise ValueError(f\"Task {task_id} not found\")\n\n path = self.dir / name\n branch = f\"wt/{name}\"\n self.events.emit(\"worktree.create.before\", task_id=task_id, wt_name=name)\n try:\n self._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\n entry = {\n \"name\": name, \"path\": str(path), \"branch\": branch,\n \"task_id\": task_id, \"status\": \"active\", \"created_at\": time.time(),\n }\n idx = self._load_index()\n idx[\"worktrees\"].append(entry)\n self._save_index(idx)\n if task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n self.events.emit(\"worktree.create.after\", task_id=task_id, wt_name=name)\n return json.dumps(entry, indent=2)\n except Exception as e:\n self.events.emit(\"worktree.create.failed\", task_id=task_id, wt_name=name, error=str(e))\n raise\n\n def list_all(self) -> str:\n wts = self._load_index().get(\"worktrees\", [])\n if not wts:\n return \"No worktrees in index.\"\n lines = []\n for wt in wts:\n suffix = f\" task={wt['task_id']}\" if wt.get(\"task_id\") else \"\"\n lines.append(f\"[{wt.get('status', '?')}] {wt['name']} -> {wt['path']} ({wt.get('branch', '-')}){suffix}\")\n return \"\\n\".join(lines)\n\n def status(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n r = subprocess.run(\n [\"git\", \"status\", \"--short\", \"--branch\"],\n cwd=path, capture_output=True, text=True, timeout=60,\n )\n return (r.stdout + r.stderr).strip() or \"Clean worktree\"\n\n def enter(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n updated = self._update_entry(name, last_entered_at=time.time())\n self.events.emit(\"worktree.enter\", task_id=wt.get(\"task_id\"), wt_name=name, path=str(path))\n return json.dumps(updated, indent=2)\n\n def run(self, name: str, command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n try:\n self._update_entry(\n name,\n last_entered_at=time.time(),\n last_command_at=time.time(),\n last_command_preview=command[:120],\n )\n self.events.emit(\"worktree.run.before\", task_id=wt.get(\"task_id\"), wt_name=name, command=command[:120])\n r = subprocess.run(command, shell=True, cwd=path,\n capture_output=True, text=True, timeout=300)\n out = (r.stdout + r.stderr).strip()\n self.events.emit(\"worktree.run.after\", task_id=wt.get(\"task_id\"), wt_name=name)\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n self.events.emit(\"worktree.run.timeout\", task_id=wt.get(\"task_id\"), wt_name=name)\n return \"Error: Timeout (300s)\"\n\n def remove(\n self,\n name: str,\n force: bool = False,\n complete_task: bool = False,\n reason: str = \"\",\n ) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n task_id = wt.get(\"task_id\")\n self.events.emit(\"worktree.remove.before\", task_id=task_id, wt_name=name)\n try:\n args = [\"worktree\", \"remove\"]\n if force:\n args.append(\"--force\")\n args.append(wt[\"path\"])\n self._run_git(args)\n if complete_task and task_id is not None:\n self.tasks.update(task_id, status=\"completed\")\n self.events.emit(\"task.completed\", task_id=task_id, wt_name=name)\n if task_id is not None:\n self.tasks.record_closeout(task_id, \"removed\", reason, keep_binding=False)\n self._update_entry(\n name,\n status=\"removed\",\n removed_at=time.time(),\n closeout={\"action\": \"remove\", \"reason\": reason, \"at\": time.time()},\n )\n self.events.emit(\"worktree.remove.after\", task_id=task_id, wt_name=name)\n return f\"Removed worktree '{name}'\"\n except Exception as e:\n self.events.emit(\"worktree.remove.failed\", task_id=task_id, wt_name=name, error=str(e))\n raise\n\n def keep(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n if wt.get(\"task_id\") is not None:\n self.tasks.record_closeout(wt[\"task_id\"], \"kept\", \"\", keep_binding=True)\n self._update_entry(\n name,\n status=\"kept\",\n kept_at=time.time(),\n closeout={\"action\": \"keep\", \"reason\": \"\", \"at\": time.time()},\n )\n self.events.emit(\"worktree.keep\", task_id=wt.get(\"task_id\"), wt_name=name)\n return json.dumps(self._find(name), indent=2)\n\n def closeout(\n self,\n name: str,\n action: str,\n reason: str = \"\",\n force: bool = False,\n complete_task: bool = False,\n ) -> str:\n if action == \"keep\":\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n if wt.get(\"task_id\") is not None:\n self.tasks.record_closeout(\n wt[\"task_id\"], \"kept\", reason, keep_binding=True\n )\n if complete_task:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self._update_entry(\n name,\n status=\"kept\",\n kept_at=time.time(),\n closeout={\"action\": \"keep\", \"reason\": reason, \"at\": time.time()},\n )\n self.events.emit(\n \"worktree.closeout.keep\",\n task_id=wt.get(\"task_id\"),\n wt_name=name,\n reason=reason,\n )\n return json.dumps(self._find(name), indent=2)\n if action == \"remove\":\n self.events.emit(\"worktree.closeout.remove\", wt_name=name, reason=reason)\n return self.remove(\n name,\n force=force,\n complete_task=complete_task,\n reason=reason,\n )\n raise ValueError(\"action must be 'keep' or 'remove'\")\n\n\nWORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS)\n\n\n# -- Base tools (same as previous sessions, kept minimal) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"], kw.get(\"owner\", \"\")),\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\"), kw.get(\"base_ref\", \"HEAD\")),\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\n \"worktree_enter\": lambda **kw: WORKTREES.enter(kw[\"name\"]),\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\n \"worktree_closeout\": lambda **kw: WORKTREES.closeout(\n kw[\"name\"],\n kw[\"action\"],\n kw.get(\"reason\", \"\"),\n kw.get(\"force\", False),\n kw.get(\"complete_task\", False),\n ),\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n \"worktree_remove\": lambda **kw: WORKTREES.remove(\n kw[\"name\"],\n kw.get(\"force\", False),\n kw.get(\"complete_task\", False),\n kw.get(\"reason\", \"\"),\n ),\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n}\n\n# Compact tool definitions -- same schema, less vertical space\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command in the current workspace.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"Create a new task on the shared task board.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_list\", \"description\": \"List all tasks with status, owner, and worktree binding.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"Get task details by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_update\", \"description\": \"Update task status or owner.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\", \"deleted\"]}, \"owner\": {\"type\": \"string\"}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_bind_worktree\", \"description\": \"Bind a task to a worktree name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"worktree\": {\"type\": \"string\"}, \"owner\": {\"type\": \"string\"}}, \"required\": [\"task_id\", \"worktree\"]}},\n {\"name\": \"worktree_create\", \"description\": \"Create a git worktree and optionally bind it to a task.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"task_id\": {\"type\": \"integer\"}, \"base_ref\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_list\", \"description\": \"List worktrees tracked in .worktrees/index.json.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"worktree_enter\", \"description\": \"Enter or reopen a worktree lane before working in it.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_status\", \"description\": \"Show git status for one worktree.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_run\", \"description\": \"Run a shell command in a named worktree directory.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"command\": {\"type\": \"string\"}}, \"required\": [\"name\", \"command\"]}},\n {\"name\": \"worktree_closeout\", \"description\": \"Close out a lane by keeping it for follow-up or removing it.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"action\": {\"type\": \"string\", \"enum\": [\"keep\", \"remove\"]}, \"reason\": {\"type\": \"string\"}, \"force\": {\"type\": \"boolean\"}, \"complete_task\": {\"type\": \"boolean\"}}, \"required\": [\"name\", \"action\"]}},\n {\"name\": \"worktree_remove\", \"description\": \"Remove a worktree and optionally mark its bound task completed.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"force\": {\"type\": \"boolean\"}, \"complete_task\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_keep\", \"description\": \"Mark a worktree as kept without removing it.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_events\", \"description\": \"List recent lifecycle events.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"limit\": {\"type\": \"integer\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(f\"Repo root for s18: {REPO_ROOT}\")\n if not WORKTREES.git_available:\n print(\"Note: Not in a git repo. worktree_* tools will return errors.\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms18 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): directory isolation(目录隔离)——并行执行车道互不碰撞。\n\"\"\"\ns18_worktree_task_isolation.py - Worktree + Task Isolation(工作树与任务隔离)\n\n使用目录级隔离实现并行任务执行。\ntask 是控制平面,worktree 是执行平面。\n\n .tasks/task_12.json\n {\n \"id\": 12,\n \"subject\": \"Implement auth refactor(实现认证重构)\",\n \"status\": \"in_progress\",\n \"worktree\": \"auth-refactor\"\n }\n\n .worktrees/index.json\n {\n \"worktrees\": [\n {\n \"name\": \"auth-refactor\",\n \"path\": \".../.worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\"\n }\n ]\n }\n\n关键洞察:\n\"按目录隔离,按 task ID 协调。\"\n\n建议阅读顺序:\n1. EventBus:worktree 生命周期如何保持可观测。\n2. TaskManager:任务如何绑定到执行车道,而不与车道本身混淆。\n3. Worktree registry / closeout helpers:目录状态如何创建、追踪与收尾。\n\n最常见混淆点:\n- worktree 不是 task 本身\n- worktree record 不是单纯路径字符串\n\n教学边界:\n本文件先讲 isolated execution lanes(隔离执行车道)。\n跨机器执行、自动合并与企业策略胶水能力,刻意不放入本章范围。\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nimport time\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\n\n\ndef detect_repo_root(cwd: Path) -> Path | None:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--show-toplevel\"],\n cwd=cwd, capture_output=True, text=True, timeout=10,\n )\n root = Path(r.stdout.strip())\n return root if r.returncode == 0 and root.exists() else None\n except Exception:\n return None\n\n\nREPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR\n\nSYSTEM = (\n f\"你是位于 {WORKDIR} 的 coding agent(编码智能体)。\"\n \"多任务场景请使用 task + worktree 工具。\"\n \"对于并行或高风险改动:先建 task,再分配 worktree 车道,\"\n \"在对应车道执行命令,最后决定 keep/remove 进行收尾。\"\n)\n\n\n# -- EventBus:append-only 生命周期事件,提供可观测性 --\nclass EventBus:\n def __init__(self, event_log_path: Path):\n self.path = event_log_path\n self.path.parent.mkdir(parents=True, exist_ok=True)\n if not self.path.exists():\n self.path.write_text(\"\")\n\n def emit(self, event: str, task_id=None, wt_name=None, error=None, **extra):\n payload = {\"event\": event, \"ts\": time.time()}\n if task_id is not None:\n payload[\"task_id\"] = task_id\n if wt_name:\n payload[\"worktree\"] = wt_name\n if error:\n payload[\"error\"] = error\n payload.update(extra)\n with self.path.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n def list_recent(self, limit: int = 20) -> str:\n n = max(1, min(int(limit or 20), 200))\n lines = self.path.read_text(encoding=\"utf-8\").splitlines()\n items = []\n for line in lines[-n:]:\n try:\n items.append(json.loads(line))\n except Exception:\n items.append({\"event\": \"parse_error\", \"raw\": line})\n return json.dumps(items, indent=2)\n\n\n# -- TaskManager:支持 worktree 绑定的持久任务板 --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = []\n for f in self.dir.glob(\"task_*.json\"):\n try:\n ids.append(int(f.stem.split(\"_\")[1]))\n except Exception:\n pass\n return max(ids) if ids else 0\n\n def _path(self, task_id: int) -> Path:\n return self.dir / f\"task_{task_id}.json\"\n\n def _load(self, task_id: int) -> dict:\n path = self._path(task_id)\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n self._path(task[\"id\"]).write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"owner\": \"\", \"worktree\": \"\",\n \"worktree_state\": \"unbound\", \"last_worktree\": \"\",\n \"closeout\": None, \"blockedBy\": [],\n \"created_at\": time.time(), \"updated_at\": time.time(),\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def exists(self, task_id: int) -> bool:\n return self._path(task_id).exists()\n\n def update(self, task_id: int, status: str = None, owner: str = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\", \"deleted\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n if owner is not None:\n task[\"owner\"] = owner\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n task[\"last_worktree\"] = worktree\n task[\"worktree_state\"] = \"active\"\n if owner:\n task[\"owner\"] = owner\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def unbind_worktree(self, task_id: int) -> str:\n task = self._load(task_id)\n task[\"worktree\"] = \"\"\n task[\"worktree_state\"] = \"unbound\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def record_closeout(self, task_id: int, action: str, reason: str = \"\", keep_binding: bool = False) -> str:\n task = self._load(task_id)\n task[\"closeout\"] = {\n \"action\": action,\n \"reason\": reason,\n \"at\": time.time(),\n }\n task[\"worktree_state\"] = action\n if not keep_binding:\n task[\"worktree\"] = \"\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\", \"deleted\": \"[-]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n wt = f\" wt={t['worktree']}\" if t.get(\"worktree\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{wt}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(REPO_ROOT / \".tasks\")\nEVENTS = EventBus(REPO_ROOT / \".worktrees\" / \"events.jsonl\")\n\n\n# -- WorktreeManager:创建/列出/执行/移除 git worktree --\nclass WorktreeManager:\n def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus):\n self.repo_root = repo_root\n self.tasks = tasks\n self.events = events\n self.dir = repo_root / \".worktrees\"\n self.dir.mkdir(parents=True, exist_ok=True)\n self.index_path = self.dir / \"index.json\"\n if not self.index_path.exists():\n self.index_path.write_text(json.dumps({\"worktrees\": []}, indent=2))\n self.git_available = self._check_git()\n\n def _check_git(self) -> bool:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--is-inside-work-tree\"],\n cwd=self.repo_root, capture_output=True, text=True, timeout=10,\n )\n return r.returncode == 0\n except Exception:\n return False\n\n def _run_git(self, args: list[str]) -> str:\n if not self.git_available:\n raise RuntimeError(\"Not in a git repository.\")\n r = subprocess.run(\n [\"git\", *args], cwd=self.repo_root,\n capture_output=True, text=True, timeout=120,\n )\n if r.returncode != 0:\n raise RuntimeError((r.stdout + r.stderr).strip() or f\"git {' '.join(args)} failed\")\n return (r.stdout + r.stderr).strip() or \"(no output)\"\n\n def _load_index(self) -> dict:\n return json.loads(self.index_path.read_text())\n\n def _save_index(self, data: dict):\n self.index_path.write_text(json.dumps(data, indent=2))\n\n def _find(self, name: str) -> dict | None:\n for wt in self._load_index().get(\"worktrees\", []):\n if wt.get(\"name\") == name:\n return wt\n return None\n\n def _update_entry(self, name: str, **changes) -> dict:\n idx = self._load_index()\n updated = None\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item.update(changes)\n updated = item\n break\n self._save_index(idx)\n if not updated:\n raise ValueError(f\"Worktree '{name}' not found in index\")\n return updated\n\n def _validate_name(self, name: str):\n if not re.fullmatch(r\"[A-Za-z0-9._-]{1,40}\", name or \"\"):\n raise ValueError(\"worktree 名称无效。请使用 1-40 个字符:字母、数字、.、_、-\")\n\n def create(self, name: str, task_id: int = None, base_ref: str = \"HEAD\") -> str:\n self._validate_name(name)\n if self._find(name):\n raise ValueError(f\"Worktree '{name}' already exists\")\n if task_id is not None and not self.tasks.exists(task_id):\n raise ValueError(f\"Task {task_id} not found\")\n\n path = self.dir / name\n branch = f\"wt/{name}\"\n self.events.emit(\"worktree.create.before\", task_id=task_id, wt_name=name)\n try:\n self._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\n entry = {\n \"name\": name, \"path\": str(path), \"branch\": branch,\n \"task_id\": task_id, \"status\": \"active\", \"created_at\": time.time(),\n }\n idx = self._load_index()\n idx[\"worktrees\"].append(entry)\n self._save_index(idx)\n if task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n self.events.emit(\"worktree.create.after\", task_id=task_id, wt_name=name)\n return json.dumps(entry, indent=2)\n except Exception as e:\n self.events.emit(\"worktree.create.failed\", task_id=task_id, wt_name=name, error=str(e))\n raise\n\n def list_all(self) -> str:\n wts = self._load_index().get(\"worktrees\", [])\n if not wts:\n return \"No worktrees in index.\"\n lines = []\n for wt in wts:\n suffix = f\" task={wt['task_id']}\" if wt.get(\"task_id\") else \"\"\n lines.append(f\"[{wt.get('status', '?')}] {wt['name']} -> {wt['path']} ({wt.get('branch', '-')}){suffix}\")\n return \"\\n\".join(lines)\n\n def status(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: 未知的 worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: worktree 路径不存在:{path}\"\n r = subprocess.run(\n [\"git\", \"status\", \"--short\", \"--branch\"],\n cwd=path, capture_output=True, text=True, timeout=60,\n )\n return (r.stdout + r.stderr).strip() or \"Clean worktree\"\n\n def enter(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: 未知的 worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: worktree 路径不存在:{path}\"\n updated = self._update_entry(name, last_entered_at=time.time())\n self.events.emit(\"worktree.enter\", task_id=wt.get(\"task_id\"), wt_name=name, path=str(path))\n return json.dumps(updated, indent=2)\n\n def run(self, name: str, command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n wt = self._find(name)\n if not wt:\n return f\"Error: 未知的 worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: worktree 路径不存在:{path}\"\n try:\n self._update_entry(\n name,\n last_entered_at=time.time(),\n last_command_at=time.time(),\n last_command_preview=command[:120],\n )\n self.events.emit(\"worktree.run.before\", task_id=wt.get(\"task_id\"), wt_name=name, command=command[:120])\n r = subprocess.run(command, shell=True, cwd=path,\n capture_output=True, text=True, timeout=300)\n out = (r.stdout + r.stderr).strip()\n self.events.emit(\"worktree.run.after\", task_id=wt.get(\"task_id\"), wt_name=name)\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n self.events.emit(\"worktree.run.timeout\", task_id=wt.get(\"task_id\"), wt_name=name)\n return \"Error: Timeout (300s)\"\n\n def remove(\n self,\n name: str,\n force: bool = False,\n complete_task: bool = False,\n reason: str = \"\",\n ) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: 未知的 worktree '{name}'\"\n task_id = wt.get(\"task_id\")\n self.events.emit(\"worktree.remove.before\", task_id=task_id, wt_name=name)\n try:\n args = [\"worktree\", \"remove\"]\n if force:\n args.append(\"--force\")\n args.append(wt[\"path\"])\n self._run_git(args)\n if complete_task and task_id is not None:\n self.tasks.update(task_id, status=\"completed\")\n self.events.emit(\"task.completed\", task_id=task_id, wt_name=name)\n if task_id is not None:\n self.tasks.record_closeout(task_id, \"removed\", reason, keep_binding=False)\n self._update_entry(\n name,\n status=\"removed\",\n removed_at=time.time(),\n closeout={\"action\": \"remove\", \"reason\": reason, \"at\": time.time()},\n )\n self.events.emit(\"worktree.remove.after\", task_id=task_id, wt_name=name)\n return f\"Removed worktree '{name}'\"\n except Exception as e:\n self.events.emit(\"worktree.remove.failed\", task_id=task_id, wt_name=name, error=str(e))\n raise\n\n def keep(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: 未知的 worktree '{name}'\"\n if wt.get(\"task_id\") is not None:\n self.tasks.record_closeout(wt[\"task_id\"], \"kept\", \"\", keep_binding=True)\n self._update_entry(\n name,\n status=\"kept\",\n kept_at=time.time(),\n closeout={\"action\": \"keep\", \"reason\": \"\", \"at\": time.time()},\n )\n self.events.emit(\"worktree.keep\", task_id=wt.get(\"task_id\"), wt_name=name)\n return json.dumps(self._find(name), indent=2)\n\n def closeout(\n self,\n name: str,\n action: str,\n reason: str = \"\",\n force: bool = False,\n complete_task: bool = False,\n ) -> str:\n if action == \"keep\":\n wt = self._find(name)\n if not wt:\n return f\"Error: 未知的 worktree '{name}'\"\n if wt.get(\"task_id\") is not None:\n self.tasks.record_closeout(\n wt[\"task_id\"], \"kept\", reason, keep_binding=True\n )\n if complete_task:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self._update_entry(\n name,\n status=\"kept\",\n kept_at=time.time(),\n closeout={\"action\": \"keep\", \"reason\": reason, \"at\": time.time()},\n )\n self.events.emit(\n \"worktree.closeout.keep\",\n task_id=wt.get(\"task_id\"),\n wt_name=name,\n reason=reason,\n )\n return json.dumps(self._find(name), indent=2)\n if action == \"remove\":\n self.events.emit(\"worktree.closeout.remove\", wt_name=name, reason=reason)\n return self.remove(\n name,\n force=force,\n complete_task=complete_task,\n reason=reason,\n )\n raise ValueError(\"action 必须是 'keep' 或 'remove'\")\n\n\nWORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS)\n\n\n# -- 基础工具(与前序章节一致,保持最小实现) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"], kw.get(\"owner\", \"\")),\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\"), kw.get(\"base_ref\", \"HEAD\")),\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\n \"worktree_enter\": lambda **kw: WORKTREES.enter(kw[\"name\"]),\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\n \"worktree_closeout\": lambda **kw: WORKTREES.closeout(\n kw[\"name\"],\n kw[\"action\"],\n kw.get(\"reason\", \"\"),\n kw.get(\"force\", False),\n kw.get(\"complete_task\", False),\n ),\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n \"worktree_remove\": lambda **kw: WORKTREES.remove(\n kw[\"name\"],\n kw.get(\"force\", False),\n kw.get(\"complete_task\", False),\n kw.get(\"reason\", \"\"),\n ),\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n}\n\n# 紧凑工具定义:保持相同 schema,减少垂直空间\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"在当前工作区执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"在共享任务板创建新任务。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_list\", \"description\": \"列出所有任务(含状态、owner、worktree 绑定)。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"按 ID 获取任务详情。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_update\", \"description\": \"更新任务状态或 owner。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\", \"deleted\"]}, \"owner\": {\"type\": \"string\"}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_bind_worktree\", \"description\": \"将任务绑定到指定 worktree 名称。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"worktree\": {\"type\": \"string\"}, \"owner\": {\"type\": \"string\"}}, \"required\": [\"task_id\", \"worktree\"]}},\n {\"name\": \"worktree_create\", \"description\": \"创建 git worktree,并可选绑定到任务。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"task_id\": {\"type\": \"integer\"}, \"base_ref\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_list\", \"description\": \"列出 `.worktrees/index.json` 跟踪的 worktree。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"worktree_enter\", \"description\": \"进入或重新打开 worktree 车道后再执行工作。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_status\", \"description\": \"查看某个 worktree 的 git status。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_run\", \"description\": \"在指定 worktree 目录执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"command\": {\"type\": \"string\"}}, \"required\": [\"name\", \"command\"]}},\n {\"name\": \"worktree_closeout\", \"description\": \"收尾工作车道:保留以便后续跟进,或直接移除。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"action\": {\"type\": \"string\", \"enum\": [\"keep\", \"remove\"]}, \"reason\": {\"type\": \"string\"}, \"force\": {\"type\": \"boolean\"}, \"complete_task\": {\"type\": \"boolean\"}}, \"required\": [\"name\", \"action\"]}},\n {\"name\": \"worktree_remove\", \"description\": \"移除 worktree,并可选将绑定任务标记为完成。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"force\": {\"type\": \"boolean\"}, \"complete_task\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_keep\", \"description\": \"标记 worktree 为 keep(不移除)。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_events\", \"description\": \"列出最近的生命周期事件。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"limit\": {\"type\": \"integer\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(f\"Repo root for s18: {REPO_ROOT}\")\n if not WORKTREES.git_available:\n print(\"提示:当前不在 git 仓库中,worktree_* 工具会返回错误。\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms18 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: directory isolation -- parallel execution lanes that never collide.\n\"\"\"\ns18_worktree_task_isolation.py - Worktree + Task Isolation\n\nDirectory-level isolation for parallel task execution.\nTasks are the control plane and worktrees are the execution plane.\n\n .tasks/task_12.json\n {\n \"id\": 12,\n \"subject\": \"Implement auth refactor\",\n \"status\": \"in_progress\",\n \"worktree\": \"auth-refactor\"\n }\n\n .worktrees/index.json\n {\n \"worktrees\": [\n {\n \"name\": \"auth-refactor\",\n \"path\": \".../.worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\"\n }\n ]\n }\n\nKey insight: \"Isolate by directory, coordinate by task ID.\"\n\nRead this file in this order:\n1. EventBus: how worktree lifecycle stays observable.\n2. TaskManager: how a task binds to an execution lane without becoming the lane itself.\n3. Worktree registry / closeout helpers: how directory state is created, tracked, and cleaned up.\n\nMost common confusion:\n- a worktree is not the task itself\n- a worktree record is not just a path string\n\nTeaching boundary:\nthis file teaches isolated execution lanes first.\nCross-machine execution, merge automation, and enterprise policy glue are intentionally out of scope.\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\ndef detect_repo_root(cwd: Path) -> Path | None:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--show-toplevel\"],\n cwd=cwd, capture_output=True, text=True, timeout=10,\n )\n root = Path(r.stdout.strip())\n return root if r.returncode == 0 and root.exists() else None\n except Exception:\n return None\n\n\nREPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Use task + worktree tools for multi-task work. \"\n \"For parallel or risky changes: create tasks, allocate worktree lanes, \"\n \"run commands in those lanes, then choose keep/remove for closeout.\"\n)\n\n\n# -- EventBus: append-only lifecycle events for observability --\nclass EventBus:\n def __init__(self, event_log_path: Path):\n self.path = event_log_path\n self.path.parent.mkdir(parents=True, exist_ok=True)\n if not self.path.exists():\n self.path.write_text(\"\")\n\n def emit(self, event: str, task_id=None, wt_name=None, error=None, **extra):\n payload = {\"event\": event, \"ts\": time.time()}\n if task_id is not None:\n payload[\"task_id\"] = task_id\n if wt_name:\n payload[\"worktree\"] = wt_name\n if error:\n payload[\"error\"] = error\n payload.update(extra)\n with self.path.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n def list_recent(self, limit: int = 20) -> str:\n n = max(1, min(int(limit or 20), 200))\n lines = self.path.read_text(encoding=\"utf-8\").splitlines()\n items = []\n for line in lines[-n:]:\n try:\n items.append(json.loads(line))\n except Exception:\n items.append({\"event\": \"parse_error\", \"raw\": line})\n return json.dumps(items, indent=2)\n\n\n# -- TaskManager: persistent task board with optional worktree binding --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = []\n for f in self.dir.glob(\"task_*.json\"):\n try:\n ids.append(int(f.stem.split(\"_\")[1]))\n except Exception:\n pass\n return max(ids) if ids else 0\n\n def _path(self, task_id: int) -> Path:\n return self.dir / f\"task_{task_id}.json\"\n\n def _load(self, task_id: int) -> dict:\n path = self._path(task_id)\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n self._path(task[\"id\"]).write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"owner\": \"\", \"worktree\": \"\",\n \"worktree_state\": \"unbound\", \"last_worktree\": \"\",\n \"closeout\": None, \"blockedBy\": [],\n \"created_at\": time.time(), \"updated_at\": time.time(),\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def exists(self, task_id: int) -> bool:\n return self._path(task_id).exists()\n\n def update(self, task_id: int, status: str = None, owner: str = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\", \"deleted\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n if owner is not None:\n task[\"owner\"] = owner\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n task[\"last_worktree\"] = worktree\n task[\"worktree_state\"] = \"active\"\n if owner:\n task[\"owner\"] = owner\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def unbind_worktree(self, task_id: int) -> str:\n task = self._load(task_id)\n task[\"worktree\"] = \"\"\n task[\"worktree_state\"] = \"unbound\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def record_closeout(self, task_id: int, action: str, reason: str = \"\", keep_binding: bool = False) -> str:\n task = self._load(task_id)\n task[\"closeout\"] = {\n \"action\": action,\n \"reason\": reason,\n \"at\": time.time(),\n }\n task[\"worktree_state\"] = action\n if not keep_binding:\n task[\"worktree\"] = \"\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\", \"deleted\": \"[-]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n wt = f\" wt={t['worktree']}\" if t.get(\"worktree\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{wt}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(REPO_ROOT / \".tasks\")\nEVENTS = EventBus(REPO_ROOT / \".worktrees\" / \"events.jsonl\")\n\n\n# -- WorktreeManager: create/list/run/remove git worktrees --\nclass WorktreeManager:\n def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus):\n self.repo_root = repo_root\n self.tasks = tasks\n self.events = events\n self.dir = repo_root / \".worktrees\"\n self.dir.mkdir(parents=True, exist_ok=True)\n self.index_path = self.dir / \"index.json\"\n if not self.index_path.exists():\n self.index_path.write_text(json.dumps({\"worktrees\": []}, indent=2))\n self.git_available = self._check_git()\n\n def _check_git(self) -> bool:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--is-inside-work-tree\"],\n cwd=self.repo_root, capture_output=True, text=True, timeout=10,\n )\n return r.returncode == 0\n except Exception:\n return False\n\n def _run_git(self, args: list[str]) -> str:\n if not self.git_available:\n raise RuntimeError(\"Not in a git repository.\")\n r = subprocess.run(\n [\"git\", *args], cwd=self.repo_root,\n capture_output=True, text=True, timeout=120,\n )\n if r.returncode != 0:\n raise RuntimeError((r.stdout + r.stderr).strip() or f\"git {' '.join(args)} failed\")\n return (r.stdout + r.stderr).strip() or \"(no output)\"\n\n def _load_index(self) -> dict:\n return json.loads(self.index_path.read_text())\n\n def _save_index(self, data: dict):\n self.index_path.write_text(json.dumps(data, indent=2))\n\n def _find(self, name: str) -> dict | None:\n for wt in self._load_index().get(\"worktrees\", []):\n if wt.get(\"name\") == name:\n return wt\n return None\n\n def _update_entry(self, name: str, **changes) -> dict:\n idx = self._load_index()\n updated = None\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item.update(changes)\n updated = item\n break\n self._save_index(idx)\n if not updated:\n raise ValueError(f\"Worktree '{name}' not found in index\")\n return updated\n\n def _validate_name(self, name: str):\n if not re.fullmatch(r\"[A-Za-z0-9._-]{1,40}\", name or \"\"):\n raise ValueError(\"Invalid worktree name. Use 1-40 chars: letters, digits, ., _, -\")\n\n def create(self, name: str, task_id: int = None, base_ref: str = \"HEAD\") -> str:\n self._validate_name(name)\n if self._find(name):\n raise ValueError(f\"Worktree '{name}' already exists\")\n if task_id is not None and not self.tasks.exists(task_id):\n raise ValueError(f\"Task {task_id} not found\")\n\n path = self.dir / name\n branch = f\"wt/{name}\"\n self.events.emit(\"worktree.create.before\", task_id=task_id, wt_name=name)\n try:\n self._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\n entry = {\n \"name\": name, \"path\": str(path), \"branch\": branch,\n \"task_id\": task_id, \"status\": \"active\", \"created_at\": time.time(),\n }\n idx = self._load_index()\n idx[\"worktrees\"].append(entry)\n self._save_index(idx)\n if task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n self.events.emit(\"worktree.create.after\", task_id=task_id, wt_name=name)\n return json.dumps(entry, indent=2)\n except Exception as e:\n self.events.emit(\"worktree.create.failed\", task_id=task_id, wt_name=name, error=str(e))\n raise\n\n def list_all(self) -> str:\n wts = self._load_index().get(\"worktrees\", [])\n if not wts:\n return \"No worktrees in index.\"\n lines = []\n for wt in wts:\n suffix = f\" task={wt['task_id']}\" if wt.get(\"task_id\") else \"\"\n lines.append(f\"[{wt.get('status', '?')}] {wt['name']} -> {wt['path']} ({wt.get('branch', '-')}){suffix}\")\n return \"\\n\".join(lines)\n\n def status(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n r = subprocess.run(\n [\"git\", \"status\", \"--short\", \"--branch\"],\n cwd=path, capture_output=True, text=True, timeout=60,\n )\n return (r.stdout + r.stderr).strip() or \"Clean worktree\"\n\n def enter(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n updated = self._update_entry(name, last_entered_at=time.time())\n self.events.emit(\"worktree.enter\", task_id=wt.get(\"task_id\"), wt_name=name, path=str(path))\n return json.dumps(updated, indent=2)\n\n def run(self, name: str, command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n try:\n self._update_entry(\n name,\n last_entered_at=time.time(),\n last_command_at=time.time(),\n last_command_preview=command[:120],\n )\n self.events.emit(\"worktree.run.before\", task_id=wt.get(\"task_id\"), wt_name=name, command=command[:120])\n r = subprocess.run(command, shell=True, cwd=path,\n capture_output=True, text=True, timeout=300)\n out = (r.stdout + r.stderr).strip()\n self.events.emit(\"worktree.run.after\", task_id=wt.get(\"task_id\"), wt_name=name)\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n self.events.emit(\"worktree.run.timeout\", task_id=wt.get(\"task_id\"), wt_name=name)\n return \"Error: Timeout (300s)\"\n\n def remove(\n self,\n name: str,\n force: bool = False,\n complete_task: bool = False,\n reason: str = \"\",\n ) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n task_id = wt.get(\"task_id\")\n self.events.emit(\"worktree.remove.before\", task_id=task_id, wt_name=name)\n try:\n args = [\"worktree\", \"remove\"]\n if force:\n args.append(\"--force\")\n args.append(wt[\"path\"])\n self._run_git(args)\n if complete_task and task_id is not None:\n self.tasks.update(task_id, status=\"completed\")\n self.events.emit(\"task.completed\", task_id=task_id, wt_name=name)\n if task_id is not None:\n self.tasks.record_closeout(task_id, \"removed\", reason, keep_binding=False)\n self._update_entry(\n name,\n status=\"removed\",\n removed_at=time.time(),\n closeout={\"action\": \"remove\", \"reason\": reason, \"at\": time.time()},\n )\n self.events.emit(\"worktree.remove.after\", task_id=task_id, wt_name=name)\n return f\"Removed worktree '{name}'\"\n except Exception as e:\n self.events.emit(\"worktree.remove.failed\", task_id=task_id, wt_name=name, error=str(e))\n raise\n\n def keep(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n if wt.get(\"task_id\") is not None:\n self.tasks.record_closeout(wt[\"task_id\"], \"kept\", \"\", keep_binding=True)\n self._update_entry(\n name,\n status=\"kept\",\n kept_at=time.time(),\n closeout={\"action\": \"keep\", \"reason\": \"\", \"at\": time.time()},\n )\n self.events.emit(\"worktree.keep\", task_id=wt.get(\"task_id\"), wt_name=name)\n return json.dumps(self._find(name), indent=2)\n\n def closeout(\n self,\n name: str,\n action: str,\n reason: str = \"\",\n force: bool = False,\n complete_task: bool = False,\n ) -> str:\n if action == \"keep\":\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n if wt.get(\"task_id\") is not None:\n self.tasks.record_closeout(\n wt[\"task_id\"], \"kept\", reason, keep_binding=True\n )\n if complete_task:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self._update_entry(\n name,\n status=\"kept\",\n kept_at=time.time(),\n closeout={\"action\": \"keep\", \"reason\": reason, \"at\": time.time()},\n )\n self.events.emit(\n \"worktree.closeout.keep\",\n task_id=wt.get(\"task_id\"),\n wt_name=name,\n reason=reason,\n )\n return json.dumps(self._find(name), indent=2)\n if action == \"remove\":\n self.events.emit(\"worktree.closeout.remove\", wt_name=name, reason=reason)\n return self.remove(\n name,\n force=force,\n complete_task=complete_task,\n reason=reason,\n )\n raise ValueError(\"action must be 'keep' or 'remove'\")\n\n\nWORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS)\n\n\n# -- Base tools (same as previous sessions, kept minimal) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"], kw.get(\"owner\", \"\")),\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\"), kw.get(\"base_ref\", \"HEAD\")),\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\n \"worktree_enter\": lambda **kw: WORKTREES.enter(kw[\"name\"]),\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\n \"worktree_closeout\": lambda **kw: WORKTREES.closeout(\n kw[\"name\"],\n kw[\"action\"],\n kw.get(\"reason\", \"\"),\n kw.get(\"force\", False),\n kw.get(\"complete_task\", False),\n ),\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n \"worktree_remove\": lambda **kw: WORKTREES.remove(\n kw[\"name\"],\n kw.get(\"force\", False),\n kw.get(\"complete_task\", False),\n kw.get(\"reason\", \"\"),\n ),\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n}\n\n# Compact tool definitions -- same schema, less vertical space\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command in the current workspace.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"Create a new task on the shared task board.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_list\", \"description\": \"List all tasks with status, owner, and worktree binding.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"Get task details by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_update\", \"description\": \"Update task status or owner.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\", \"deleted\"]}, \"owner\": {\"type\": \"string\"}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_bind_worktree\", \"description\": \"Bind a task to a worktree name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"worktree\": {\"type\": \"string\"}, \"owner\": {\"type\": \"string\"}}, \"required\": [\"task_id\", \"worktree\"]}},\n {\"name\": \"worktree_create\", \"description\": \"Create a git worktree and optionally bind it to a task.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"task_id\": {\"type\": \"integer\"}, \"base_ref\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_list\", \"description\": \"List worktrees tracked in .worktrees/index.json.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"worktree_enter\", \"description\": \"Enter or reopen a worktree lane before working in it.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_status\", \"description\": \"Show git status for one worktree.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_run\", \"description\": \"Run a shell command in a named worktree directory.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"command\": {\"type\": \"string\"}}, \"required\": [\"name\", \"command\"]}},\n {\"name\": \"worktree_closeout\", \"description\": \"Close out a lane by keeping it for follow-up or removing it.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"action\": {\"type\": \"string\", \"enum\": [\"keep\", \"remove\"]}, \"reason\": {\"type\": \"string\"}, \"force\": {\"type\": \"boolean\"}, \"complete_task\": {\"type\": \"boolean\"}}, \"required\": [\"name\", \"action\"]}},\n {\"name\": \"worktree_remove\", \"description\": \"Remove a worktree and optionally mark its bound task completed.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"force\": {\"type\": \"boolean\"}, \"complete_task\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_keep\", \"description\": \"Mark a worktree as kept without removing it.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_events\", \"description\": \"List recent lifecycle events.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"limit\": {\"type\": \"integer\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(f\"Repo root for s18: {REPO_ROOT}\")\n if not WORKTREES.git_available:\n print(\"Note: Not in a git repo. worktree_* tools will return errors.\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms18 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: directory isolation -- parallel execution lanes that never collide.\n\"\"\"\ns18_worktree_task_isolation.py - Worktree + Task Isolation\n\nDirectory-level isolation for parallel task execution.\nTasks are the control plane and worktrees are the execution plane.\n\n .tasks/task_12.json\n {\n \"id\": 12,\n \"subject\": \"Implement auth refactor\",\n \"status\": \"in_progress\",\n \"worktree\": \"auth-refactor\"\n }\n\n .worktrees/index.json\n {\n \"worktrees\": [\n {\n \"name\": \"auth-refactor\",\n \"path\": \".../.worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\"\n }\n ]\n }\n\nKey insight: \"Isolate by directory, coordinate by task ID.\"\n\nRead this file in this order:\n1. EventBus: how worktree lifecycle stays observable.\n2. TaskManager: how a task binds to an execution lane without becoming the lane itself.\n3. Worktree registry / closeout helpers: how directory state is created, tracked, and cleaned up.\n\nMost common confusion:\n- a worktree is not the task itself\n- a worktree record is not just a path string\n\nTeaching boundary:\nthis file teaches isolated execution lanes first.\nCross-machine execution, merge automation, and enterprise policy glue are intentionally out of scope.\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\ndef detect_repo_root(cwd: Path) -> Path | None:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--show-toplevel\"],\n cwd=cwd, capture_output=True, text=True, timeout=10,\n )\n root = Path(r.stdout.strip())\n return root if r.returncode == 0 and root.exists() else None\n except Exception:\n return None\n\n\nREPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Use task + worktree tools for multi-task work. \"\n \"For parallel or risky changes: create tasks, allocate worktree lanes, \"\n \"run commands in those lanes, then choose keep/remove for closeout.\"\n)\n\n\n# -- EventBus: append-only lifecycle events for observability --\nclass EventBus:\n def __init__(self, event_log_path: Path):\n self.path = event_log_path\n self.path.parent.mkdir(parents=True, exist_ok=True)\n if not self.path.exists():\n self.path.write_text(\"\")\n\n def emit(self, event: str, task_id=None, wt_name=None, error=None, **extra):\n payload = {\"event\": event, \"ts\": time.time()}\n if task_id is not None:\n payload[\"task_id\"] = task_id\n if wt_name:\n payload[\"worktree\"] = wt_name\n if error:\n payload[\"error\"] = error\n payload.update(extra)\n with self.path.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n def list_recent(self, limit: int = 20) -> str:\n n = max(1, min(int(limit or 20), 200))\n lines = self.path.read_text(encoding=\"utf-8\").splitlines()\n items = []\n for line in lines[-n:]:\n try:\n items.append(json.loads(line))\n except Exception:\n items.append({\"event\": \"parse_error\", \"raw\": line})\n return json.dumps(items, indent=2)\n\n\n# -- TaskManager: persistent task board with optional worktree binding --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = []\n for f in self.dir.glob(\"task_*.json\"):\n try:\n ids.append(int(f.stem.split(\"_\")[1]))\n except Exception:\n pass\n return max(ids) if ids else 0\n\n def _path(self, task_id: int) -> Path:\n return self.dir / f\"task_{task_id}.json\"\n\n def _load(self, task_id: int) -> dict:\n path = self._path(task_id)\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n self._path(task[\"id\"]).write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"owner\": \"\", \"worktree\": \"\",\n \"worktree_state\": \"unbound\", \"last_worktree\": \"\",\n \"closeout\": None, \"blockedBy\": [],\n \"created_at\": time.time(), \"updated_at\": time.time(),\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def exists(self, task_id: int) -> bool:\n return self._path(task_id).exists()\n\n def update(self, task_id: int, status: str = None, owner: str = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\", \"deleted\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n if owner is not None:\n task[\"owner\"] = owner\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n task[\"last_worktree\"] = worktree\n task[\"worktree_state\"] = \"active\"\n if owner:\n task[\"owner\"] = owner\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def unbind_worktree(self, task_id: int) -> str:\n task = self._load(task_id)\n task[\"worktree\"] = \"\"\n task[\"worktree_state\"] = \"unbound\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def record_closeout(self, task_id: int, action: str, reason: str = \"\", keep_binding: bool = False) -> str:\n task = self._load(task_id)\n task[\"closeout\"] = {\n \"action\": action,\n \"reason\": reason,\n \"at\": time.time(),\n }\n task[\"worktree_state\"] = action\n if not keep_binding:\n task[\"worktree\"] = \"\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\", \"deleted\": \"[-]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n wt = f\" wt={t['worktree']}\" if t.get(\"worktree\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{wt}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(REPO_ROOT / \".tasks\")\nEVENTS = EventBus(REPO_ROOT / \".worktrees\" / \"events.jsonl\")\n\n\n# -- WorktreeManager: create/list/run/remove git worktrees --\nclass WorktreeManager:\n def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus):\n self.repo_root = repo_root\n self.tasks = tasks\n self.events = events\n self.dir = repo_root / \".worktrees\"\n self.dir.mkdir(parents=True, exist_ok=True)\n self.index_path = self.dir / \"index.json\"\n if not self.index_path.exists():\n self.index_path.write_text(json.dumps({\"worktrees\": []}, indent=2))\n self.git_available = self._check_git()\n\n def _check_git(self) -> bool:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--is-inside-work-tree\"],\n cwd=self.repo_root, capture_output=True, text=True, timeout=10,\n )\n return r.returncode == 0\n except Exception:\n return False\n\n def _run_git(self, args: list[str]) -> str:\n if not self.git_available:\n raise RuntimeError(\"Not in a git repository.\")\n r = subprocess.run(\n [\"git\", *args], cwd=self.repo_root,\n capture_output=True, text=True, timeout=120,\n )\n if r.returncode != 0:\n raise RuntimeError((r.stdout + r.stderr).strip() or f\"git {' '.join(args)} failed\")\n return (r.stdout + r.stderr).strip() or \"(no output)\"\n\n def _load_index(self) -> dict:\n return json.loads(self.index_path.read_text())\n\n def _save_index(self, data: dict):\n self.index_path.write_text(json.dumps(data, indent=2))\n\n def _find(self, name: str) -> dict | None:\n for wt in self._load_index().get(\"worktrees\", []):\n if wt.get(\"name\") == name:\n return wt\n return None\n\n def _update_entry(self, name: str, **changes) -> dict:\n idx = self._load_index()\n updated = None\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item.update(changes)\n updated = item\n break\n self._save_index(idx)\n if not updated:\n raise ValueError(f\"Worktree '{name}' not found in index\")\n return updated\n\n def _validate_name(self, name: str):\n if not re.fullmatch(r\"[A-Za-z0-9._-]{1,40}\", name or \"\"):\n raise ValueError(\"Invalid worktree name. Use 1-40 chars: letters, digits, ., _, -\")\n\n def create(self, name: str, task_id: int = None, base_ref: str = \"HEAD\") -> str:\n self._validate_name(name)\n if self._find(name):\n raise ValueError(f\"Worktree '{name}' already exists\")\n if task_id is not None and not self.tasks.exists(task_id):\n raise ValueError(f\"Task {task_id} not found\")\n\n path = self.dir / name\n branch = f\"wt/{name}\"\n self.events.emit(\"worktree.create.before\", task_id=task_id, wt_name=name)\n try:\n self._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\n entry = {\n \"name\": name, \"path\": str(path), \"branch\": branch,\n \"task_id\": task_id, \"status\": \"active\", \"created_at\": time.time(),\n }\n idx = self._load_index()\n idx[\"worktrees\"].append(entry)\n self._save_index(idx)\n if task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n self.events.emit(\"worktree.create.after\", task_id=task_id, wt_name=name)\n return json.dumps(entry, indent=2)\n except Exception as e:\n self.events.emit(\"worktree.create.failed\", task_id=task_id, wt_name=name, error=str(e))\n raise\n\n def list_all(self) -> str:\n wts = self._load_index().get(\"worktrees\", [])\n if not wts:\n return \"No worktrees in index.\"\n lines = []\n for wt in wts:\n suffix = f\" task={wt['task_id']}\" if wt.get(\"task_id\") else \"\"\n lines.append(f\"[{wt.get('status', '?')}] {wt['name']} -> {wt['path']} ({wt.get('branch', '-')}){suffix}\")\n return \"\\n\".join(lines)\n\n def status(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n r = subprocess.run(\n [\"git\", \"status\", \"--short\", \"--branch\"],\n cwd=path, capture_output=True, text=True, timeout=60,\n )\n return (r.stdout + r.stderr).strip() or \"Clean worktree\"\n\n def enter(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n updated = self._update_entry(name, last_entered_at=time.time())\n self.events.emit(\"worktree.enter\", task_id=wt.get(\"task_id\"), wt_name=name, path=str(path))\n return json.dumps(updated, indent=2)\n\n def run(self, name: str, command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n try:\n self._update_entry(\n name,\n last_entered_at=time.time(),\n last_command_at=time.time(),\n last_command_preview=command[:120],\n )\n self.events.emit(\"worktree.run.before\", task_id=wt.get(\"task_id\"), wt_name=name, command=command[:120])\n r = subprocess.run(command, shell=True, cwd=path,\n capture_output=True, text=True, timeout=300)\n out = (r.stdout + r.stderr).strip()\n self.events.emit(\"worktree.run.after\", task_id=wt.get(\"task_id\"), wt_name=name)\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n self.events.emit(\"worktree.run.timeout\", task_id=wt.get(\"task_id\"), wt_name=name)\n return \"Error: Timeout (300s)\"\n\n def remove(\n self,\n name: str,\n force: bool = False,\n complete_task: bool = False,\n reason: str = \"\",\n ) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n task_id = wt.get(\"task_id\")\n self.events.emit(\"worktree.remove.before\", task_id=task_id, wt_name=name)\n try:\n args = [\"worktree\", \"remove\"]\n if force:\n args.append(\"--force\")\n args.append(wt[\"path\"])\n self._run_git(args)\n if complete_task and task_id is not None:\n self.tasks.update(task_id, status=\"completed\")\n self.events.emit(\"task.completed\", task_id=task_id, wt_name=name)\n if task_id is not None:\n self.tasks.record_closeout(task_id, \"removed\", reason, keep_binding=False)\n self._update_entry(\n name,\n status=\"removed\",\n removed_at=time.time(),\n closeout={\"action\": \"remove\", \"reason\": reason, \"at\": time.time()},\n )\n self.events.emit(\"worktree.remove.after\", task_id=task_id, wt_name=name)\n return f\"Removed worktree '{name}'\"\n except Exception as e:\n self.events.emit(\"worktree.remove.failed\", task_id=task_id, wt_name=name, error=str(e))\n raise\n\n def keep(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n if wt.get(\"task_id\") is not None:\n self.tasks.record_closeout(wt[\"task_id\"], \"kept\", \"\", keep_binding=True)\n self._update_entry(\n name,\n status=\"kept\",\n kept_at=time.time(),\n closeout={\"action\": \"keep\", \"reason\": \"\", \"at\": time.time()},\n )\n self.events.emit(\"worktree.keep\", task_id=wt.get(\"task_id\"), wt_name=name)\n return json.dumps(self._find(name), indent=2)\n\n def closeout(\n self,\n name: str,\n action: str,\n reason: str = \"\",\n force: bool = False,\n complete_task: bool = False,\n ) -> str:\n if action == \"keep\":\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n if wt.get(\"task_id\") is not None:\n self.tasks.record_closeout(\n wt[\"task_id\"], \"kept\", reason, keep_binding=True\n )\n if complete_task:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self._update_entry(\n name,\n status=\"kept\",\n kept_at=time.time(),\n closeout={\"action\": \"keep\", \"reason\": reason, \"at\": time.time()},\n )\n self.events.emit(\n \"worktree.closeout.keep\",\n task_id=wt.get(\"task_id\"),\n wt_name=name,\n reason=reason,\n )\n return json.dumps(self._find(name), indent=2)\n if action == \"remove\":\n self.events.emit(\"worktree.closeout.remove\", wt_name=name, reason=reason)\n return self.remove(\n name,\n force=force,\n complete_task=complete_task,\n reason=reason,\n )\n raise ValueError(\"action must be 'keep' or 'remove'\")\n\n\nWORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS)\n\n\n# -- Base tools (same as previous sessions, kept minimal) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"], kw.get(\"owner\", \"\")),\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\"), kw.get(\"base_ref\", \"HEAD\")),\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\n \"worktree_enter\": lambda **kw: WORKTREES.enter(kw[\"name\"]),\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\n \"worktree_closeout\": lambda **kw: WORKTREES.closeout(\n kw[\"name\"],\n kw[\"action\"],\n kw.get(\"reason\", \"\"),\n kw.get(\"force\", False),\n kw.get(\"complete_task\", False),\n ),\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n \"worktree_remove\": lambda **kw: WORKTREES.remove(\n kw[\"name\"],\n kw.get(\"force\", False),\n kw.get(\"complete_task\", False),\n kw.get(\"reason\", \"\"),\n ),\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n}\n\n# Compact tool definitions -- same schema, less vertical space\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command in the current workspace.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"Create a new task on the shared task board.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_list\", \"description\": \"List all tasks with status, owner, and worktree binding.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"Get task details by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_update\", \"description\": \"Update task status or owner.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\", \"deleted\"]}, \"owner\": {\"type\": \"string\"}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_bind_worktree\", \"description\": \"Bind a task to a worktree name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"worktree\": {\"type\": \"string\"}, \"owner\": {\"type\": \"string\"}}, \"required\": [\"task_id\", \"worktree\"]}},\n {\"name\": \"worktree_create\", \"description\": \"Create a git worktree and optionally bind it to a task.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"task_id\": {\"type\": \"integer\"}, \"base_ref\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_list\", \"description\": \"List worktrees tracked in .worktrees/index.json.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"worktree_enter\", \"description\": \"Enter or reopen a worktree lane before working in it.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_status\", \"description\": \"Show git status for one worktree.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_run\", \"description\": \"Run a shell command in a named worktree directory.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"command\": {\"type\": \"string\"}}, \"required\": [\"name\", \"command\"]}},\n {\"name\": \"worktree_closeout\", \"description\": \"Close out a lane by keeping it for follow-up or removing it.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"action\": {\"type\": \"string\", \"enum\": [\"keep\", \"remove\"]}, \"reason\": {\"type\": \"string\"}, \"force\": {\"type\": \"boolean\"}, \"complete_task\": {\"type\": \"boolean\"}}, \"required\": [\"name\", \"action\"]}},\n {\"name\": \"worktree_remove\", \"description\": \"Remove a worktree and optionally mark its bound task completed.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"force\": {\"type\": \"boolean\"}, \"complete_task\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_keep\", \"description\": \"Mark a worktree as kept without removing it.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}}, \"required\": [\"name\"]}},\n {\"name\": \"worktree_events\", \"description\": \"List recent lifecycle events.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"limit\": {\"type\": \"integer\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(f\"Repo root for s18: {REPO_ROOT}\")\n if not WORKTREES.git_available:\n print(\"Note: Not in a git repo. worktree_* tools will return errors.\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms18 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ },
+ {
+ "id": "s19",
+ "filename": "s19_mcp_plugin.py",
+ "title": "MCP & Plugin",
+ "subtitle": "External Capability Bus",
+ "loc": 463,
+ "tools": [
+ "bash",
+ "read_file",
+ "write_file",
+ "edit_file"
+ ],
+ "newTools": [],
+ "coreAddition": "Scoped servers + capability routing",
+ "keyInsight": "External capabilities join the same routing, permission, and result-append path as native tools.",
+ "classes": [
+ {
+ "name": "CapabilityPermissionGate",
+ "startLine": 60,
+ "endLine": 144
+ },
+ {
+ "name": "MCPClient",
+ "startLine": 148,
+ "endLine": 268
+ },
+ {
+ "name": "PluginLoader",
+ "startLine": 269,
+ "endLine": 308
+ },
+ {
+ "name": "MCPToolRouter",
+ "startLine": 309,
+ "endLine": 346
+ }
+ ],
+ "functions": [
+ {
+ "name": "safe_path",
+ "signature": "def safe_path(p: str)",
+ "startLine": 347
+ },
+ {
+ "name": "run_bash",
+ "signature": "def run_bash(command: str)",
+ "startLine": 353
+ },
+ {
+ "name": "run_read",
+ "signature": "def run_read(path: str)",
+ "startLine": 365
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
- "startLine": 513
+ "startLine": 371
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
- "startLine": 523
+ "startLine": 380
+ },
+ {
+ "name": "build_tool_pool",
+ "signature": "def build_tool_pool()",
+ "startLine": 416
+ },
+ {
+ "name": "handle_tool_call",
+ "signature": "def handle_tool_call(tool_name: str, tool_input: dict)",
+ "startLine": 434
+ },
+ {
+ "name": "normalize_tool_result",
+ "signature": "def normalize_tool_result(tool_name: str, output: str, intent: dict | None = None)",
+ "startLine": 444
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
- "startLine": 728
+ "startLine": 458
}
],
- "layer": "collaboration",
- "source": "#!/usr/bin/env python3\n\"\"\"\ns12_worktree_task_isolation.py - Worktree + Task Isolation\n\nDirectory-level isolation for parallel task execution.\nTasks are the control plane and worktrees are the execution plane.\n\n .tasks/task_12.json\n {\n \"id\": 12,\n \"subject\": \"Implement auth refactor\",\n \"status\": \"in_progress\",\n \"worktree\": \"auth-refactor\"\n }\n\n .worktrees/index.json\n {\n \"worktrees\": [\n {\n \"name\": \"auth-refactor\",\n \"path\": \".../.worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\"\n }\n ]\n }\n\nKey insight: \"Isolate by directory, coordinate by task ID.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\ndef detect_repo_root(cwd: Path) -> Path | None:\n \"\"\"Return git repo root if cwd is inside a repo, else None.\"\"\"\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--show-toplevel\"],\n cwd=cwd,\n capture_output=True,\n text=True,\n timeout=10,\n )\n if r.returncode != 0:\n return None\n root = Path(r.stdout.strip())\n return root if root.exists() else None\n except Exception:\n return None\n\n\nREPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Use task + worktree tools for multi-task work. \"\n \"For parallel or risky changes: create tasks, allocate worktree lanes, \"\n \"run commands in those lanes, then choose keep/remove for closeout. \"\n \"Use worktree_events when you need lifecycle visibility.\"\n)\n\n\n# -- EventBus: append-only lifecycle events for observability --\nclass EventBus:\n def __init__(self, event_log_path: Path):\n self.path = event_log_path\n self.path.parent.mkdir(parents=True, exist_ok=True)\n if not self.path.exists():\n self.path.write_text(\"\")\n\n def emit(\n self,\n event: str,\n task: dict | None = None,\n worktree: dict | None = None,\n error: str | None = None,\n ):\n payload = {\n \"event\": event,\n \"ts\": time.time(),\n \"task\": task or {},\n \"worktree\": worktree or {},\n }\n if error:\n payload[\"error\"] = error\n with self.path.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n def list_recent(self, limit: int = 20) -> str:\n n = max(1, min(int(limit or 20), 200))\n lines = self.path.read_text(encoding=\"utf-8\").splitlines()\n recent = lines[-n:]\n items = []\n for line in recent:\n try:\n items.append(json.loads(line))\n except Exception:\n items.append({\"event\": \"parse_error\", \"raw\": line})\n return json.dumps(items, indent=2)\n\n\n# -- TaskManager: persistent task board with optional worktree binding --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = []\n for f in self.dir.glob(\"task_*.json\"):\n try:\n ids.append(int(f.stem.split(\"_\")[1]))\n except Exception:\n pass\n return max(ids) if ids else 0\n\n def _path(self, task_id: int) -> Path:\n return self.dir / f\"task_{task_id}.json\"\n\n def _load(self, task_id: int) -> dict:\n path = self._path(task_id)\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n self._path(task[\"id\"]).write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"owner\": \"\",\n \"worktree\": \"\",\n \"blockedBy\": [],\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def exists(self, task_id: int) -> bool:\n return self._path(task_id).exists()\n\n def update(self, task_id: int, status: str = None, owner: str = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n if owner is not None:\n task[\"owner\"] = owner\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if owner:\n task[\"owner\"] = owner\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def unbind_worktree(self, task_id: int) -> str:\n task = self._load(task_id)\n task[\"worktree\"] = \"\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\n \"pending\": \"[ ]\",\n \"in_progress\": \"[>]\",\n \"completed\": \"[x]\",\n }.get(t[\"status\"], \"[?]\")\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n wt = f\" wt={t['worktree']}\" if t.get(\"worktree\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{wt}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(REPO_ROOT / \".tasks\")\nEVENTS = EventBus(REPO_ROOT / \".worktrees\" / \"events.jsonl\")\n\n\n# -- WorktreeManager: create/list/run/remove git worktrees + lifecycle index --\nclass WorktreeManager:\n def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus):\n self.repo_root = repo_root\n self.tasks = tasks\n self.events = events\n self.dir = repo_root / \".worktrees\"\n self.dir.mkdir(parents=True, exist_ok=True)\n self.index_path = self.dir / \"index.json\"\n if not self.index_path.exists():\n self.index_path.write_text(json.dumps({\"worktrees\": []}, indent=2))\n self.git_available = self._is_git_repo()\n\n def _is_git_repo(self) -> bool:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--is-inside-work-tree\"],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=10,\n )\n return r.returncode == 0\n except Exception:\n return False\n\n def _run_git(self, args: list[str]) -> str:\n if not self.git_available:\n raise RuntimeError(\"Not in a git repository. worktree tools require git.\")\n r = subprocess.run(\n [\"git\", *args],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=120,\n )\n if r.returncode != 0:\n msg = (r.stdout + r.stderr).strip()\n raise RuntimeError(msg or f\"git {' '.join(args)} failed\")\n return (r.stdout + r.stderr).strip() or \"(no output)\"\n\n def _load_index(self) -> dict:\n return json.loads(self.index_path.read_text())\n\n def _save_index(self, data: dict):\n self.index_path.write_text(json.dumps(data, indent=2))\n\n def _find(self, name: str) -> dict | None:\n idx = self._load_index()\n for wt in idx.get(\"worktrees\", []):\n if wt.get(\"name\") == name:\n return wt\n return None\n\n def _validate_name(self, name: str):\n if not re.fullmatch(r\"[A-Za-z0-9._-]{1,40}\", name or \"\"):\n raise ValueError(\n \"Invalid worktree name. Use 1-40 chars: letters, numbers, ., _, -\"\n )\n\n def create(self, name: str, task_id: int = None, base_ref: str = \"HEAD\") -> str:\n self._validate_name(name)\n if self._find(name):\n raise ValueError(f\"Worktree '{name}' already exists in index\")\n if task_id is not None and not self.tasks.exists(task_id):\n raise ValueError(f\"Task {task_id} not found\")\n\n path = self.dir / name\n branch = f\"wt/{name}\"\n self.events.emit(\n \"worktree.create.before\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n )\n try:\n self._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\n\n entry = {\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"task_id\": task_id,\n \"status\": \"active\",\n \"created_at\": time.time(),\n }\n\n idx = self._load_index()\n idx[\"worktrees\"].append(entry)\n self._save_index(idx)\n\n if task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n\n self.events.emit(\n \"worktree.create.after\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"status\": \"active\",\n },\n )\n return json.dumps(entry, indent=2)\n except Exception as e:\n self.events.emit(\n \"worktree.create.failed\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n error=str(e),\n )\n raise\n\n def list_all(self) -> str:\n idx = self._load_index()\n wts = idx.get(\"worktrees\", [])\n if not wts:\n return \"No worktrees in index.\"\n lines = []\n for wt in wts:\n suffix = f\" task={wt['task_id']}\" if wt.get(\"task_id\") else \"\"\n lines.append(\n f\"[{wt.get('status', 'unknown')}] {wt['name']} -> \"\n f\"{wt['path']} ({wt.get('branch', '-')}){suffix}\"\n )\n return \"\\n\".join(lines)\n\n def status(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n r = subprocess.run(\n [\"git\", \"status\", \"--short\", \"--branch\"],\n cwd=path,\n capture_output=True,\n text=True,\n timeout=60,\n )\n text = (r.stdout + r.stderr).strip()\n return text or \"Clean worktree\"\n\n def run(self, name: str, command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=path,\n capture_output=True,\n text=True,\n timeout=300,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (300s)\"\n\n def remove(self, name: str, force: bool = False, complete_task: bool = False) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n self.events.emit(\n \"worktree.remove.before\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n )\n try:\n args = [\"worktree\", \"remove\"]\n if force:\n args.append(\"--force\")\n args.append(wt[\"path\"])\n self._run_git(args)\n\n if complete_task and wt.get(\"task_id\") is not None:\n task_id = wt[\"task_id\"]\n before = json.loads(self.tasks.get(task_id))\n self.tasks.update(task_id, status=\"completed\")\n self.tasks.unbind_worktree(task_id)\n self.events.emit(\n \"task.completed\",\n task={\n \"id\": task_id,\n \"subject\": before.get(\"subject\", \"\"),\n \"status\": \"completed\",\n },\n worktree={\"name\": name},\n )\n\n idx = self._load_index()\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"removed\"\n item[\"removed_at\"] = time.time()\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.remove.after\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\"), \"status\": \"removed\"},\n )\n return f\"Removed worktree '{name}'\"\n except Exception as e:\n self.events.emit(\n \"worktree.remove.failed\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n error=str(e),\n )\n raise\n\n def keep(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n idx = self._load_index()\n kept = None\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"kept\"\n item[\"kept_at\"] = time.time()\n kept = item\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.keep\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\n \"name\": name,\n \"path\": wt.get(\"path\"),\n \"status\": \"kept\",\n },\n )\n return json.dumps(kept, indent=2) if kept else f\"Error: Unknown worktree '{name}'\"\n\n\nWORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS)\n\n\n# -- Base tools (kept minimal, same style as previous sessions) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"], kw.get(\"owner\", \"\")),\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\"), kw.get(\"base_ref\", \"HEAD\")),\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n \"worktree_remove\": lambda **kw: WORKTREES.remove(kw[\"name\"], kw.get(\"force\", False), kw.get(\"complete_task\", False)),\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command in the current workspace (blocking).\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"task_create\",\n \"description\": \"Create a new task on the shared task board.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n },\n \"required\": [\"subject\"],\n },\n },\n {\n \"name\": \"task_list\",\n \"description\": \"List all tasks with status, owner, and worktree binding.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"task_get\",\n \"description\": \"Get task details by ID.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"integer\"}},\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_update\",\n \"description\": \"Update task status or owner.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"status\": {\n \"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"],\n },\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_bind_worktree\",\n \"description\": \"Bind a task to a worktree name.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"worktree\": {\"type\": \"string\"},\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\", \"worktree\"],\n },\n },\n {\n \"name\": \"worktree_create\",\n \"description\": \"Create a git worktree and optionally bind it to a task.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"integer\"},\n \"base_ref\": {\"type\": \"string\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_list\",\n \"description\": \"List worktrees tracked in .worktrees/index.json.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"worktree_status\",\n \"description\": \"Show git status for one worktree.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_run\",\n \"description\": \"Run a shell command in a named worktree directory.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"command\": {\"type\": \"string\"},\n },\n \"required\": [\"name\", \"command\"],\n },\n },\n {\n \"name\": \"worktree_remove\",\n \"description\": \"Remove a worktree and optionally mark its bound task completed.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"force\": {\"type\": \"boolean\"},\n \"complete_task\": {\"type\": \"boolean\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_keep\",\n \"description\": \"Mark a worktree as kept in lifecycle state without removing it.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_events\",\n \"description\": \"List recent worktree/task lifecycle events from .worktrees/events.jsonl.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"limit\": {\"type\": \"integer\"}},\n },\n },\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append(\n {\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n }\n )\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(f\"Repo root for s12: {REPO_ROOT}\")\n if not WORKTREES.git_available:\n print(\"Note: Not in a git repo. worktree_* tools will return errors.\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
+ "layer": "platform",
+ "sourceByLocale": {
+ "en": "#!/usr/bin/env python3\n# Harness: integration -- tools aren't just in your code.\n\"\"\"\ns19_mcp_plugin.py - MCP & Plugin System\n\nThis teaching chapter focuses on the smallest useful idea:\nexternal processes can expose tools, and your agent can treat them like\nnormal tools after a small amount of normalization.\n\nMinimal path:\n 1. start an MCP server process\n 2. ask it which tools it has\n 3. prefix and register those tools\n 4. route matching calls to that server\n\nPlugins add one more layer: discovery. A tiny manifest tells the agent which\nexternal server to start.\n\nKey insight: \"External tools should enter the same tool pipeline, not form a\ncompletely separate world.\" In practice that means shared permission checks\nand normalized tool_result payloads.\n\nRead this file in this order:\n1. CapabilityPermissionGate: external tools still go through the same control gate.\n2. MCPClient: how one server connection exposes tool specs and tool calls.\n3. PluginLoader: how manifests declare external servers.\n4. MCPToolRouter / build_tool_pool: how native and external tools merge into one pool.\n\nMost common confusion:\n- a plugin manifest is not an MCP server\n- an MCP server is not a single MCP tool\n- external capability does not bypass the native permission path\n\nTeaching boundary:\nthis file teaches the smallest useful stdio MCP path.\nMarketplace details, auth flows, reconnect logic, and non-tool capability layers\nare intentionally left to bridge docs and later extensions.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nPERMISSION_MODES = (\"default\", \"auto\")\n\n\nclass CapabilityPermissionGate:\n \"\"\"\n Shared permission gate for native tools and external capabilities.\n\n The teaching goal is simple: MCP does not bypass the control plane.\n Native tools and MCP tools both become normalized capability intents first,\n then pass through the same allow / ask policy.\n \"\"\"\n\n READ_PREFIXES = (\"read\", \"list\", \"get\", \"show\", \"search\", \"query\", \"inspect\")\n HIGH_RISK_PREFIXES = (\"delete\", \"remove\", \"drop\", \"shutdown\")\n\n def __init__(self, mode: str = \"default\"):\n self.mode = mode if mode in PERMISSION_MODES else \"default\"\n\n def normalize(self, tool_name: str, tool_input: dict) -> dict:\n if tool_name.startswith(\"mcp__\"):\n _, server_name, actual_tool = tool_name.split(\"__\", 2)\n source = \"mcp\"\n else:\n server_name = None\n actual_tool = tool_name\n source = \"native\"\n\n lowered = actual_tool.lower()\n if actual_tool == \"read_file\" or lowered.startswith(self.READ_PREFIXES):\n risk = \"read\"\n elif actual_tool == \"bash\":\n command = tool_input.get(\"command\", \"\")\n risk = \"high\" if any(\n token in command for token in (\"rm -rf\", \"sudo\", \"shutdown\", \"reboot\")\n ) else \"write\"\n elif lowered.startswith(self.HIGH_RISK_PREFIXES):\n risk = \"high\"\n else:\n risk = \"write\"\n\n return {\n \"source\": source,\n \"server\": server_name,\n \"tool\": actual_tool,\n \"risk\": risk,\n }\n\n def check(self, tool_name: str, tool_input: dict) -> dict:\n intent = self.normalize(tool_name, tool_input)\n\n if intent[\"risk\"] == \"read\":\n return {\"behavior\": \"allow\", \"reason\": \"Read capability\", \"intent\": intent}\n\n if self.mode == \"auto\" and intent[\"risk\"] != \"high\":\n return {\n \"behavior\": \"allow\",\n \"reason\": \"Auto mode for non-high-risk capability\",\n \"intent\": intent,\n }\n\n if intent[\"risk\"] == \"high\":\n return {\n \"behavior\": \"ask\",\n \"reason\": \"High-risk capability requires confirmation\",\n \"intent\": intent,\n }\n\n return {\n \"behavior\": \"ask\",\n \"reason\": \"State-changing capability requires confirmation\",\n \"intent\": intent,\n }\n\n def ask_user(self, intent: dict, tool_input: dict) -> bool:\n preview = json.dumps(tool_input, ensure_ascii=False)[:200]\n source = (\n f\"{intent['source']}:{intent['server']}/{intent['tool']}\"\n if intent.get(\"server\")\n else f\"{intent['source']}:{intent['tool']}\"\n )\n print(f\"\\n [Permission] {source} risk={intent['risk']}: {preview}\")\n try:\n answer = input(\" Allow? (y/n): \").strip().lower()\n except (EOFError, KeyboardInterrupt):\n return False\n return answer in (\"y\", \"yes\")\n\n\npermission_gate = CapabilityPermissionGate()\n\n\nclass MCPClient:\n \"\"\"\n Minimal MCP client over stdio.\n\n This is enough to teach the core architecture without dragging readers\n through every transport, auth flow, or marketplace detail up front.\n \"\"\"\n\n def __init__(self, server_name: str, command: str, args: list = None, env: dict = None):\n self.server_name = server_name\n self.command = command\n self.args = args or []\n self.env = {**os.environ, **(env or {})}\n self.process = None\n self._request_id = 0\n self._tools = [] # cached tool list\n\n def connect(self):\n \"\"\"Start the MCP server process.\"\"\"\n try:\n self.process = subprocess.Popen(\n [self.command] + self.args,\n stdin=subprocess.PIPE,\n stdout=subprocess.PIPE,\n stderr=subprocess.PIPE,\n env=self.env,\n text=True,\n )\n # Send initialize request\n self._send({\"method\": \"initialize\", \"params\": {\n \"protocolVersion\": \"2024-11-05\",\n \"capabilities\": {},\n \"clientInfo\": {\"name\": \"teaching-agent\", \"version\": \"1.0\"},\n }})\n response = self._recv()\n if response and \"result\" in response:\n # Send initialized notification\n self._send({\"method\": \"notifications/initialized\"})\n return True\n except FileNotFoundError:\n print(f\"[MCP] Server command not found: {self.command}\")\n except Exception as e:\n print(f\"[MCP] Connection failed: {e}\")\n return False\n\n def list_tools(self) -> list:\n \"\"\"Fetch available tools from the server.\"\"\"\n self._send({\"method\": \"tools/list\", \"params\": {}})\n response = self._recv()\n if response and \"result\" in response:\n self._tools = response[\"result\"].get(\"tools\", [])\n return self._tools\n\n def call_tool(self, tool_name: str, arguments: dict) -> str:\n \"\"\"Execute a tool on the server.\"\"\"\n self._send({\"method\": \"tools/call\", \"params\": {\n \"name\": tool_name,\n \"arguments\": arguments,\n }})\n response = self._recv()\n if response and \"result\" in response:\n content = response[\"result\"].get(\"content\", [])\n return \"\\n\".join(c.get(\"text\", str(c)) for c in content)\n if response and \"error\" in response:\n return f\"MCP Error: {response['error'].get('message', 'unknown')}\"\n return \"MCP Error: no response\"\n\n def get_agent_tools(self) -> list:\n \"\"\"\n Convert MCP tools to agent tool format.\n\n Teaching version uses the same simple prefix idea:\n mcp__{server_name}__{tool_name}\n \"\"\"\n agent_tools = []\n for tool in self._tools:\n prefixed_name = f\"mcp__{self.server_name}__{tool['name']}\"\n agent_tools.append({\n \"name\": prefixed_name,\n \"description\": tool.get(\"description\", \"\"),\n \"input_schema\": tool.get(\"inputSchema\", {\"type\": \"object\", \"properties\": {}}),\n \"_mcp_server\": self.server_name,\n \"_mcp_tool\": tool[\"name\"],\n })\n return agent_tools\n\n def disconnect(self):\n \"\"\"Shut down the server process.\"\"\"\n if self.process:\n try:\n self._send({\"method\": \"shutdown\"})\n self.process.terminate()\n self.process.wait(timeout=5)\n except Exception:\n self.process.kill()\n self.process = None\n\n def _send(self, message: dict):\n if not self.process or self.process.poll() is not None:\n return\n self._request_id += 1\n envelope = {\"jsonrpc\": \"2.0\", \"id\": self._request_id, **message}\n line = json.dumps(envelope) + \"\\n\"\n try:\n self.process.stdin.write(line)\n self.process.stdin.flush()\n except (BrokenPipeError, OSError):\n pass\n\n def _recv(self) -> dict | None:\n if not self.process or self.process.poll() is not None:\n return None\n try:\n line = self.process.stdout.readline()\n if line:\n return json.loads(line)\n except (json.JSONDecodeError, OSError):\n pass\n return None\n\n\nclass PluginLoader:\n \"\"\"\n Load plugins from .claude-plugin/ directories.\n\n Teaching version implements the smallest useful plugin flow:\n read a manifest, discover MCP server configs, and register them.\n \"\"\"\n\n def __init__(self, search_dirs: list = None):\n self.search_dirs = search_dirs or [WORKDIR]\n self.plugins = {} # name -> manifest\n\n def scan(self) -> list:\n \"\"\"Scan directories for .claude-plugin/plugin.json manifests.\"\"\"\n found = []\n for search_dir in self.search_dirs:\n plugin_dir = Path(search_dir) / \".claude-plugin\"\n manifest_path = plugin_dir / \"plugin.json\"\n if manifest_path.exists():\n try:\n manifest = json.loads(manifest_path.read_text())\n name = manifest.get(\"name\", plugin_dir.parent.name)\n self.plugins[name] = manifest\n found.append(name)\n except (json.JSONDecodeError, OSError) as e:\n print(f\"[Plugin] Failed to load {manifest_path}: {e}\")\n return found\n\n def get_mcp_servers(self) -> dict:\n \"\"\"\n Extract MCP server configs from loaded plugins.\n Returns {server_name: {command, args, env}}.\n \"\"\"\n servers = {}\n for plugin_name, manifest in self.plugins.items():\n for server_name, config in manifest.get(\"mcpServers\", {}).items():\n servers[f\"{plugin_name}__{server_name}\"] = config\n return servers\n\n\nclass MCPToolRouter:\n \"\"\"\n Routes tool calls to the correct MCP server.\n\n MCP tools are prefixed mcp__{server}__{tool} and live alongside\n native tools in the same tool pool. The router strips the prefix\n and dispatches to the right MCPClient.\n \"\"\"\n\n def __init__(self):\n self.clients = {} # server_name -> MCPClient\n\n def register_client(self, client: MCPClient):\n self.clients[client.server_name] = client\n\n def is_mcp_tool(self, tool_name: str) -> bool:\n return tool_name.startswith(\"mcp__\")\n\n def call(self, tool_name: str, arguments: dict) -> str:\n \"\"\"Route an MCP tool call to the correct server.\"\"\"\n parts = tool_name.split(\"__\", 2)\n if len(parts) != 3:\n return f\"Error: Invalid MCP tool name: {tool_name}\"\n _, server_name, actual_tool = parts\n client = self.clients.get(server_name)\n if not client:\n return f\"Error: MCP server not found: {server_name}\"\n return client.call_tool(actual_tool, arguments)\n\n def get_all_tools(self) -> list:\n \"\"\"Collect tools from all connected MCP servers.\"\"\"\n tools = []\n for client in self.clients.values():\n tools.extend(client.get_agent_tools())\n return tools\n\n\n# -- Native tool implementations (same as s02) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str) -> str:\n try:\n return safe_path(path).read_text()[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nNATIVE_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"]),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nNATIVE_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- MCP Tool Router (global) --\nmcp_router = MCPToolRouter()\nplugin_loader = PluginLoader()\n\n\ndef build_tool_pool() -> list:\n \"\"\"\n Assemble the complete tool pool: native + MCP tools.\n\n Native tools take precedence on name conflicts so the local core remains\n predictable even after external tools are added.\n \"\"\"\n all_tools = list(NATIVE_TOOLS)\n mcp_tools = mcp_router.get_all_tools()\n\n native_names = {t[\"name\"] for t in all_tools}\n for tool in mcp_tools:\n if tool[\"name\"] not in native_names:\n all_tools.append(tool)\n\n return all_tools\n\n\ndef handle_tool_call(tool_name: str, tool_input: dict) -> str:\n \"\"\"Dispatch to native handler or MCP router.\"\"\"\n if mcp_router.is_mcp_tool(tool_name):\n return mcp_router.call(tool_name, tool_input)\n handler = NATIVE_HANDLERS.get(tool_name)\n if handler:\n return handler(**tool_input)\n return f\"Unknown tool: {tool_name}\"\n\n\ndef normalize_tool_result(tool_name: str, output: str, intent: dict | None = None) -> str:\n intent = intent or permission_gate.normalize(tool_name, {})\n status = \"error\" if \"Error:\" in output or \"MCP Error:\" in output else \"ok\"\n payload = {\n \"source\": intent[\"source\"],\n \"server\": intent.get(\"server\"),\n \"tool\": intent[\"tool\"],\n \"risk\": intent[\"risk\"],\n \"status\": status,\n \"preview\": output[:500],\n }\n return json.dumps(payload, indent=2, ensure_ascii=False)\n\n\ndef agent_loop(messages: list):\n \"\"\"Agent loop with unified native + MCP tool pool.\"\"\"\n tools = build_tool_pool()\n\n while True:\n system = (\n f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\\n\"\n \"You have both native tools and MCP tools available.\\n\"\n \"MCP tools are prefixed with mcp__{server}__{tool}.\\n\"\n \"All capabilities pass through the same permission gate before execution.\"\n )\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=tools, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n decision = permission_gate.check(block.name, block.input or {})\n try:\n if decision[\"behavior\"] == \"deny\":\n output = f\"Permission denied: {decision['reason']}\"\n elif decision[\"behavior\"] == \"ask\" and not permission_gate.ask_user(\n decision[\"intent\"], block.input or {}\n ):\n output = f\"Permission denied by user: {decision['reason']}\"\n else:\n output = handle_tool_call(block.name, block.input or {})\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": normalize_tool_result(\n block.name,\n str(output),\n decision.get(\"intent\"),\n ),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\n# Further upgrades you can add later:\n# - more transports\n# - auth / approval flows\n# - server reconnect and lifecycle management\n# - filtering external tools before they reach the model\n# - richer plugin installation and update handling\n\n\nif __name__ == \"__main__\":\n # Scan for plugins\n found = plugin_loader.scan()\n if found:\n print(f\"[Plugins loaded: {', '.join(found)}]\")\n for server_name, config in plugin_loader.get_mcp_servers().items():\n mcp_client = MCPClient(server_name, config.get(\"command\", \"\"), config.get(\"args\", []))\n if mcp_client.connect():\n mcp_client.list_tools()\n mcp_router.register_client(mcp_client)\n print(f\"[MCP] Connected to {server_name}\")\n\n tool_count = len(build_tool_pool())\n mcp_count = len(mcp_router.get_all_tools())\n print(f\"[Tool pool: {tool_count} tools ({mcp_count} from MCP)]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms19 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n if query.strip() == \"/tools\":\n for tool in build_tool_pool():\n prefix = \"[MCP] \" if tool[\"name\"].startswith(\"mcp__\") else \" \"\n print(f\" {prefix}{tool['name']}: {tool.get('description', '')[:60]}\")\n continue\n\n if query.strip() == \"/mcp\":\n if mcp_router.clients:\n for name, c in mcp_router.clients.items():\n tools = c.get_agent_tools()\n print(f\" {name}: {len(tools)} tools\")\n else:\n print(\" (no MCP servers connected)\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n\n # Cleanup MCP connections\n for c in mcp_router.clients.values():\n c.disconnect()\n",
+ "zh": "#!/usr/bin/env python3\n# Harness(执行框架): integration(集成)——工具不只存在于本地代码里。\n\"\"\"\ns19_mcp_plugin.py - MCP & Plugin System(插件与外部能力)\n\n本章聚焦最小可用观点:\n外部进程可暴露工具,经过少量规范化后,\n智能体即可像使用本地工具一样使用它们。\n\n最小路径(Minimal path):\n 1. 启动一个 MCP server(服务器)进程\n 2. 查询它暴露了哪些 tools(工具)\n 3. 为这些工具加前缀并注册\n 4. 将匹配调用路由到该服务器\n\nPlugins(插件)会再增加一层 discovery(发现)。\n一个小型 manifest(清单)即可告诉智能体应启动哪个外部服务器。\n\n关键洞察:\n\"外部工具应进入同一工具管线,而不是形成平行世界。\"\n实践上这意味着共享权限检查与统一 tool_result 载荷。\n\n建议阅读顺序:\n1. CapabilityPermissionGate:外部工具仍走同一控制闸门。\n2. MCPClient:单个服务器连接如何暴露 tool spec(工具规格)与工具调用。\n3. PluginLoader:manifest 如何声明外部服务器。\n4. MCPToolRouter / build_tool_pool:本地与外部工具如何合并成同一工具池。\n\n最常见混淆点:\n- plugin manifest(插件清单)不是 MCP server(服务器)\n- MCP server 不是单个 MCP tool(工具)\n- 外部能力不会绕过本地权限路径\n\n教学边界:\n本文件只讲最小可用的 stdio MCP 路径。\nMarketplace(市场)细节、auth(鉴权)流程、重连逻辑以及非工具能力层,\n刻意留给桥接文档与后续扩展。\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nfrom pathlib import Path\n\ntry:\n from agents.llm_client import create_client\nexcept ModuleNotFoundError:\n from llm_client import create_client\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\n\nWORKDIR = Path.cwd()\nclient = create_client()\nMODEL = os.environ[\"MODEL_ID\"]\nPERMISSION_MODES = (\"default\", \"auto\")\n\n\nclass CapabilityPermissionGate:\n \"\"\"\n 本地工具与外部能力共用的权限门控。\n\n 教学目标很简单:MCP 不绕过控制平面。\n 本地工具与 MCP 工具都先规范化为 capability intent,\n 再走同一 allow/ask 策略。\n \"\"\"\n\n READ_PREFIXES = (\"read\", \"list\", \"get\", \"show\", \"search\", \"query\", \"inspect\")\n HIGH_RISK_PREFIXES = (\"delete\", \"remove\", \"drop\", \"shutdown\")\n\n def __init__(self, mode: str = \"default\"):\n self.mode = mode if mode in PERMISSION_MODES else \"default\"\n\n def normalize(self, tool_name: str, tool_input: dict) -> dict:\n if tool_name.startswith(\"mcp__\"):\n _, server_name, actual_tool = tool_name.split(\"__\", 2)\n source = \"mcp\"\n else:\n server_name = None\n actual_tool = tool_name\n source = \"native\"\n\n lowered = actual_tool.lower()\n if actual_tool == \"read_file\" or lowered.startswith(self.READ_PREFIXES):\n risk = \"read\"\n elif actual_tool == \"bash\":\n command = tool_input.get(\"command\", \"\")\n risk = \"high\" if any(\n token in command for token in (\"rm -rf\", \"sudo\", \"shutdown\", \"reboot\")\n ) else \"write\"\n elif lowered.startswith(self.HIGH_RISK_PREFIXES):\n risk = \"high\"\n else:\n risk = \"write\"\n\n return {\n \"source\": source,\n \"server\": server_name,\n \"tool\": actual_tool,\n \"risk\": risk,\n }\n\n def check(self, tool_name: str, tool_input: dict) -> dict:\n intent = self.normalize(tool_name, tool_input)\n\n if intent[\"risk\"] == \"read\":\n return {\"behavior\": \"allow\", \"reason\": \"只读能力\", \"intent\": intent}\n\n if self.mode == \"auto\" and intent[\"risk\"] != \"high\":\n return {\n \"behavior\": \"allow\",\n \"reason\": \"auto 模式下的非高风险能力\",\n \"intent\": intent,\n }\n\n if intent[\"risk\"] == \"high\":\n return {\n \"behavior\": \"ask\",\n \"reason\": \"高风险能力需要确认\",\n \"intent\": intent,\n }\n\n return {\n \"behavior\": \"ask\",\n \"reason\": \"会改变状态的能力需要确认\",\n \"intent\": intent,\n }\n\n def ask_user(self, intent: dict, tool_input: dict) -> bool:\n preview = json.dumps(tool_input, ensure_ascii=False)[:200]\n source = (\n f\"{intent['source']}:{intent['server']}/{intent['tool']}\"\n if intent.get(\"server\")\n else f\"{intent['source']}:{intent['tool']}\"\n )\n print(f\"\\n [Permission] {source} risk={intent['risk']}: {preview}\")\n try:\n answer = input(\" 是否允许?(y/n): \").strip().lower()\n except (EOFError, KeyboardInterrupt):\n return False\n return answer in (\"y\", \"yes\")\n\n\npermission_gate = CapabilityPermissionGate()\n\n\nclass MCPClient:\n \"\"\"\n 基于 stdio 的最小 MCP 客户端。\n\n 该实现足以讲清核心架构,\n 无需提前卷入传输层、鉴权流和市场细节。\n \"\"\"\n\n def __init__(self, server_name: str, command: str, args: list = None, env: dict = None):\n self.server_name = server_name\n self.command = command\n self.args = args or []\n self.env = {**os.environ, **(env or {})}\n self.process = None\n self._request_id = 0\n self._tools = [] # 已缓存工具列表\n\n def connect(self):\n \"\"\"启动 MCP server 进程。\"\"\"\n try:\n self.process = subprocess.Popen(\n [self.command] + self.args,\n stdin=subprocess.PIPE,\n stdout=subprocess.PIPE,\n stderr=subprocess.PIPE,\n env=self.env,\n text=True,\n )\n # 发送 initialize 请求\n self._send({\"method\": \"initialize\", \"params\": {\n \"protocolVersion\": \"2024-11-05\",\n \"capabilities\": {},\n \"clientInfo\": {\"name\": \"teaching-agent\", \"version\": \"1.0\"},\n }})\n response = self._recv()\n if response and \"result\" in response:\n # 发送 initialized 通知\n self._send({\"method\": \"notifications/initialized\"})\n return True\n except FileNotFoundError:\n print(f\"[MCP] 未找到服务器命令:{self.command}\")\n except Exception as e:\n print(f\"[MCP] Connection failed: {e}\")\n return False\n\n def list_tools(self) -> list:\n \"\"\"从 server 拉取可用工具列表。\"\"\"\n self._send({\"method\": \"tools/list\", \"params\": {}})\n response = self._recv()\n if response and \"result\" in response:\n self._tools = response[\"result\"].get(\"tools\", [])\n return self._tools\n\n def call_tool(self, tool_name: str, arguments: dict) -> str:\n \"\"\"在 server 端执行工具。\"\"\"\n self._send({\"method\": \"tools/call\", \"params\": {\n \"name\": tool_name,\n \"arguments\": arguments,\n }})\n response = self._recv()\n if response and \"result\" in response:\n content = response[\"result\"].get(\"content\", [])\n return \"\\n\".join(c.get(\"text\", str(c)) for c in content)\n if response and \"error\" in response:\n return f\"MCP Error: {response['error'].get('message', 'unknown')}\"\n return \"MCP Error: no response\"\n\n def get_agent_tools(self) -> list:\n \"\"\"\n 将 MCP 工具转换为 agent 工具格式。\n\n 教学版沿用简单前缀方案:\n mcp__{server_name}__{tool_name}\n \"\"\"\n agent_tools = []\n for tool in self._tools:\n prefixed_name = f\"mcp__{self.server_name}__{tool['name']}\"\n agent_tools.append({\n \"name\": prefixed_name,\n \"description\": tool.get(\"description\", \"\"),\n \"input_schema\": tool.get(\"inputSchema\", {\"type\": \"object\", \"properties\": {}}),\n \"_mcp_server\": self.server_name,\n \"_mcp_tool\": tool[\"name\"],\n })\n return agent_tools\n\n def disconnect(self):\n \"\"\"关闭 server 进程。\"\"\"\n if self.process:\n try:\n self._send({\"method\": \"shutdown\"})\n self.process.terminate()\n self.process.wait(timeout=5)\n except Exception:\n self.process.kill()\n self.process = None\n\n def _send(self, message: dict):\n if not self.process or self.process.poll() is not None:\n return\n self._request_id += 1\n envelope = {\"jsonrpc\": \"2.0\", \"id\": self._request_id, **message}\n line = json.dumps(envelope) + \"\\n\"\n try:\n self.process.stdin.write(line)\n self.process.stdin.flush()\n except (BrokenPipeError, OSError):\n pass\n\n def _recv(self) -> dict | None:\n if not self.process or self.process.poll() is not None:\n return None\n try:\n line = self.process.stdout.readline()\n if line:\n return json.loads(line)\n except (json.JSONDecodeError, OSError):\n pass\n return None\n\n\nclass PluginLoader:\n \"\"\"\n 从 `.claude-plugin/` 目录加载插件。\n\n 教学版仅实现最小插件流程:\n 读取 manifest,发现 MCP server 配置并注册。\n \"\"\"\n\n def __init__(self, search_dirs: list = None):\n self.search_dirs = search_dirs or [WORKDIR]\n self.plugins = {} # name -> manifest(清单)\n\n def scan(self) -> list:\n \"\"\"扫描目录,查找 `.claude-plugin/plugin.json` 清单文件。\"\"\"\n found = []\n for search_dir in self.search_dirs:\n plugin_dir = Path(search_dir) / \".claude-plugin\"\n manifest_path = plugin_dir / \"plugin.json\"\n if manifest_path.exists():\n try:\n manifest = json.loads(manifest_path.read_text())\n name = manifest.get(\"name\", plugin_dir.parent.name)\n self.plugins[name] = manifest\n found.append(name)\n except (json.JSONDecodeError, OSError) as e:\n print(f\"[Plugin] Failed to load {manifest_path}: {e}\")\n return found\n\n def get_mcp_servers(self) -> dict:\n \"\"\"\n 从已加载插件中提取 MCP server 配置。\n 返回 {server_name: {command, args, env}}。\n \"\"\"\n servers = {}\n for plugin_name, manifest in self.plugins.items():\n for server_name, config in manifest.get(\"mcpServers\", {}).items():\n servers[f\"{plugin_name}__{server_name}\"] = config\n return servers\n\n\nclass MCPToolRouter:\n \"\"\"\n 将工具调用路由到正确的 MCP server。\n\n MCP 工具命名为 `mcp__{server}__{tool}`,\n 与本地工具共存于同一工具池。\n Router 负责拆前缀并分发到目标 MCPClient。\n \"\"\"\n\n def __init__(self):\n self.clients = {} # server_name(服务名)-> MCPClient\n\n def register_client(self, client: MCPClient):\n self.clients[client.server_name] = client\n\n def is_mcp_tool(self, tool_name: str) -> bool:\n return tool_name.startswith(\"mcp__\")\n\n def call(self, tool_name: str, arguments: dict) -> str:\n \"\"\"将 MCP 工具调用路由到正确 server。\"\"\"\n parts = tool_name.split(\"__\", 2)\n if len(parts) != 3:\n return f\"Error: 非法的 MCP 工具名:{tool_name}\"\n _, server_name, actual_tool = parts\n client = self.clients.get(server_name)\n if not client:\n return f\"Error: 未找到 MCP server:{server_name}\"\n return client.call_tool(actual_tool, arguments)\n\n def get_all_tools(self) -> list:\n \"\"\"汇总所有已连接 MCP server 的工具。\"\"\"\n tools = []\n for client in self.clients.values():\n tools.extend(client.get_agent_tools())\n return tools\n\n\n# -- 原生工具实现(与 s02 保持一致) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: 危险命令已拦截\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str) -> str:\n try:\n return safe_path(path).read_text()[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nNATIVE_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"]),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nNATIVE_TOOLS = [\n {\"name\": \"bash\", \"description\": \"执行 shell 命令。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"读取文件内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"向文件写入内容。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"在文件中替换精确文本。\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- MCP 工具路由器(全局实例) --\nmcp_router = MCPToolRouter()\nplugin_loader = PluginLoader()\n\n\ndef build_tool_pool() -> list:\n \"\"\"\n 组装完整工具池:native(原生)+ MCP 工具。\n\n 当名称冲突时,native 工具优先,确保引入外部工具后\n 本地核心能力仍保持可预测。\n \"\"\"\n all_tools = list(NATIVE_TOOLS)\n mcp_tools = mcp_router.get_all_tools()\n\n native_names = {t[\"name\"] for t in all_tools}\n for tool in mcp_tools:\n if tool[\"name\"] not in native_names:\n all_tools.append(tool)\n\n return all_tools\n\n\ndef handle_tool_call(tool_name: str, tool_input: dict) -> str:\n \"\"\"分发到 native 处理器或 MCP 路由器。\"\"\"\n if mcp_router.is_mcp_tool(tool_name):\n return mcp_router.call(tool_name, tool_input)\n handler = NATIVE_HANDLERS.get(tool_name)\n if handler:\n return handler(**tool_input)\n return f\"Unknown tool: {tool_name}\"\n\n\ndef normalize_tool_result(tool_name: str, output: str, intent: dict | None = None) -> str:\n intent = intent or permission_gate.normalize(tool_name, {})\n status = \"error\" if \"Error:\" in output or \"MCP Error:\" in output else \"ok\"\n payload = {\n \"source\": intent[\"source\"],\n \"server\": intent.get(\"server\"),\n \"tool\": intent[\"tool\"],\n \"risk\": intent[\"risk\"],\n \"status\": status,\n \"preview\": output[:500],\n }\n return json.dumps(payload, indent=2, ensure_ascii=False)\n\n\ndef agent_loop(messages: list):\n \"\"\"统一使用 native + MCP 工具池的智能体主循环。\"\"\"\n tools = build_tool_pool()\n\n while True:\n system = (\n f\"你是位于 {WORKDIR} 的 coding agent(编码智能体),请使用工具解决任务。\\n\"\n \"你可同时使用 native tools(本地工具)与 MCP tools(外部工具)。\\n\"\n \"MCP 工具采用前缀 mcp__{server}__{tool}。\\n\"\n \"所有能力在执行前都必须经过同一权限门控。\"\n )\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=tools, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n decision = permission_gate.check(block.name, block.input or {})\n try:\n if decision[\"behavior\"] == \"deny\":\n output = f\"Permission denied: {decision['reason']}\"\n elif decision[\"behavior\"] == \"ask\" and not permission_gate.ask_user(\n decision[\"intent\"], block.input or {}\n ):\n output = f\"用户拒绝执行:{decision['reason']}\"\n else:\n output = handle_tool_call(block.name, block.input or {})\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": normalize_tool_result(\n block.name,\n str(output),\n decision.get(\"intent\"),\n ),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\n# 你可在后续继续扩展:\n# - 更多 transport(传输层)类型\n# - auth / approval(鉴权与审批)流程\n# - server 重连与生命周期管理\n# - 模型可见前的外部工具过滤\n# - 更丰富的插件安装与更新处理\n\n\nif __name__ == \"__main__\":\n# 扫描并加载插件\n found = plugin_loader.scan()\n if found:\n print(f\"[Plugins loaded: {', '.join(found)}]\")\n for server_name, config in plugin_loader.get_mcp_servers().items():\n mcp_client = MCPClient(server_name, config.get(\"command\", \"\"), config.get(\"args\", []))\n if mcp_client.connect():\n mcp_client.list_tools()\n mcp_router.register_client(mcp_client)\n print(f\"[MCP] Connected to {server_name}\")\n\n tool_count = len(build_tool_pool())\n mcp_count = len(mcp_router.get_all_tools())\n print(f\"[Tool pool: {tool_count} tools ({mcp_count} from MCP)]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms19 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n if query.strip() == \"/tools\":\n for tool in build_tool_pool():\n prefix = \"[MCP] \" if tool[\"name\"].startswith(\"mcp__\") else \" \"\n print(f\" {prefix}{tool['name']}: {tool.get('description', '')[:60]}\")\n continue\n\n if query.strip() == \"/mcp\":\n if mcp_router.clients:\n for name, c in mcp_router.clients.items():\n tools = c.get_agent_tools()\n print(f\" {name}: {len(tools)} tools\")\n else:\n print(\" (未连接任何 MCP 服务器)\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n\n# 清理 MCP 连接\n for c in mcp_router.clients.values():\n c.disconnect()\n",
+ "ja": "#!/usr/bin/env python3\n# Harness: integration -- tools aren't just in your code.\n\"\"\"\ns19_mcp_plugin.py - MCP & Plugin System\n\nThis teaching chapter focuses on the smallest useful idea:\nexternal processes can expose tools, and your agent can treat them like\nnormal tools after a small amount of normalization.\n\nMinimal path:\n 1. start an MCP server process\n 2. ask it which tools it has\n 3. prefix and register those tools\n 4. route matching calls to that server\n\nPlugins add one more layer: discovery. A tiny manifest tells the agent which\nexternal server to start.\n\nKey insight: \"External tools should enter the same tool pipeline, not form a\ncompletely separate world.\" In practice that means shared permission checks\nand normalized tool_result payloads.\n\nRead this file in this order:\n1. CapabilityPermissionGate: external tools still go through the same control gate.\n2. MCPClient: how one server connection exposes tool specs and tool calls.\n3. PluginLoader: how manifests declare external servers.\n4. MCPToolRouter / build_tool_pool: how native and external tools merge into one pool.\n\nMost common confusion:\n- a plugin manifest is not an MCP server\n- an MCP server is not a single MCP tool\n- external capability does not bypass the native permission path\n\nTeaching boundary:\nthis file teaches the smallest useful stdio MCP path.\nMarketplace details, auth flows, reconnect logic, and non-tool capability layers\nare intentionally left to bridge docs and later extensions.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nPERMISSION_MODES = (\"default\", \"auto\")\n\n\nclass CapabilityPermissionGate:\n \"\"\"\n Shared permission gate for native tools and external capabilities.\n\n The teaching goal is simple: MCP does not bypass the control plane.\n Native tools and MCP tools both become normalized capability intents first,\n then pass through the same allow / ask policy.\n \"\"\"\n\n READ_PREFIXES = (\"read\", \"list\", \"get\", \"show\", \"search\", \"query\", \"inspect\")\n HIGH_RISK_PREFIXES = (\"delete\", \"remove\", \"drop\", \"shutdown\")\n\n def __init__(self, mode: str = \"default\"):\n self.mode = mode if mode in PERMISSION_MODES else \"default\"\n\n def normalize(self, tool_name: str, tool_input: dict) -> dict:\n if tool_name.startswith(\"mcp__\"):\n _, server_name, actual_tool = tool_name.split(\"__\", 2)\n source = \"mcp\"\n else:\n server_name = None\n actual_tool = tool_name\n source = \"native\"\n\n lowered = actual_tool.lower()\n if actual_tool == \"read_file\" or lowered.startswith(self.READ_PREFIXES):\n risk = \"read\"\n elif actual_tool == \"bash\":\n command = tool_input.get(\"command\", \"\")\n risk = \"high\" if any(\n token in command for token in (\"rm -rf\", \"sudo\", \"shutdown\", \"reboot\")\n ) else \"write\"\n elif lowered.startswith(self.HIGH_RISK_PREFIXES):\n risk = \"high\"\n else:\n risk = \"write\"\n\n return {\n \"source\": source,\n \"server\": server_name,\n \"tool\": actual_tool,\n \"risk\": risk,\n }\n\n def check(self, tool_name: str, tool_input: dict) -> dict:\n intent = self.normalize(tool_name, tool_input)\n\n if intent[\"risk\"] == \"read\":\n return {\"behavior\": \"allow\", \"reason\": \"Read capability\", \"intent\": intent}\n\n if self.mode == \"auto\" and intent[\"risk\"] != \"high\":\n return {\n \"behavior\": \"allow\",\n \"reason\": \"Auto mode for non-high-risk capability\",\n \"intent\": intent,\n }\n\n if intent[\"risk\"] == \"high\":\n return {\n \"behavior\": \"ask\",\n \"reason\": \"High-risk capability requires confirmation\",\n \"intent\": intent,\n }\n\n return {\n \"behavior\": \"ask\",\n \"reason\": \"State-changing capability requires confirmation\",\n \"intent\": intent,\n }\n\n def ask_user(self, intent: dict, tool_input: dict) -> bool:\n preview = json.dumps(tool_input, ensure_ascii=False)[:200]\n source = (\n f\"{intent['source']}:{intent['server']}/{intent['tool']}\"\n if intent.get(\"server\")\n else f\"{intent['source']}:{intent['tool']}\"\n )\n print(f\"\\n [Permission] {source} risk={intent['risk']}: {preview}\")\n try:\n answer = input(\" Allow? (y/n): \").strip().lower()\n except (EOFError, KeyboardInterrupt):\n return False\n return answer in (\"y\", \"yes\")\n\n\npermission_gate = CapabilityPermissionGate()\n\n\nclass MCPClient:\n \"\"\"\n Minimal MCP client over stdio.\n\n This is enough to teach the core architecture without dragging readers\n through every transport, auth flow, or marketplace detail up front.\n \"\"\"\n\n def __init__(self, server_name: str, command: str, args: list = None, env: dict = None):\n self.server_name = server_name\n self.command = command\n self.args = args or []\n self.env = {**os.environ, **(env or {})}\n self.process = None\n self._request_id = 0\n self._tools = [] # cached tool list\n\n def connect(self):\n \"\"\"Start the MCP server process.\"\"\"\n try:\n self.process = subprocess.Popen(\n [self.command] + self.args,\n stdin=subprocess.PIPE,\n stdout=subprocess.PIPE,\n stderr=subprocess.PIPE,\n env=self.env,\n text=True,\n )\n # Send initialize request\n self._send({\"method\": \"initialize\", \"params\": {\n \"protocolVersion\": \"2024-11-05\",\n \"capabilities\": {},\n \"clientInfo\": {\"name\": \"teaching-agent\", \"version\": \"1.0\"},\n }})\n response = self._recv()\n if response and \"result\" in response:\n # Send initialized notification\n self._send({\"method\": \"notifications/initialized\"})\n return True\n except FileNotFoundError:\n print(f\"[MCP] Server command not found: {self.command}\")\n except Exception as e:\n print(f\"[MCP] Connection failed: {e}\")\n return False\n\n def list_tools(self) -> list:\n \"\"\"Fetch available tools from the server.\"\"\"\n self._send({\"method\": \"tools/list\", \"params\": {}})\n response = self._recv()\n if response and \"result\" in response:\n self._tools = response[\"result\"].get(\"tools\", [])\n return self._tools\n\n def call_tool(self, tool_name: str, arguments: dict) -> str:\n \"\"\"Execute a tool on the server.\"\"\"\n self._send({\"method\": \"tools/call\", \"params\": {\n \"name\": tool_name,\n \"arguments\": arguments,\n }})\n response = self._recv()\n if response and \"result\" in response:\n content = response[\"result\"].get(\"content\", [])\n return \"\\n\".join(c.get(\"text\", str(c)) for c in content)\n if response and \"error\" in response:\n return f\"MCP Error: {response['error'].get('message', 'unknown')}\"\n return \"MCP Error: no response\"\n\n def get_agent_tools(self) -> list:\n \"\"\"\n Convert MCP tools to agent tool format.\n\n Teaching version uses the same simple prefix idea:\n mcp__{server_name}__{tool_name}\n \"\"\"\n agent_tools = []\n for tool in self._tools:\n prefixed_name = f\"mcp__{self.server_name}__{tool['name']}\"\n agent_tools.append({\n \"name\": prefixed_name,\n \"description\": tool.get(\"description\", \"\"),\n \"input_schema\": tool.get(\"inputSchema\", {\"type\": \"object\", \"properties\": {}}),\n \"_mcp_server\": self.server_name,\n \"_mcp_tool\": tool[\"name\"],\n })\n return agent_tools\n\n def disconnect(self):\n \"\"\"Shut down the server process.\"\"\"\n if self.process:\n try:\n self._send({\"method\": \"shutdown\"})\n self.process.terminate()\n self.process.wait(timeout=5)\n except Exception:\n self.process.kill()\n self.process = None\n\n def _send(self, message: dict):\n if not self.process or self.process.poll() is not None:\n return\n self._request_id += 1\n envelope = {\"jsonrpc\": \"2.0\", \"id\": self._request_id, **message}\n line = json.dumps(envelope) + \"\\n\"\n try:\n self.process.stdin.write(line)\n self.process.stdin.flush()\n except (BrokenPipeError, OSError):\n pass\n\n def _recv(self) -> dict | None:\n if not self.process or self.process.poll() is not None:\n return None\n try:\n line = self.process.stdout.readline()\n if line:\n return json.loads(line)\n except (json.JSONDecodeError, OSError):\n pass\n return None\n\n\nclass PluginLoader:\n \"\"\"\n Load plugins from .claude-plugin/ directories.\n\n Teaching version implements the smallest useful plugin flow:\n read a manifest, discover MCP server configs, and register them.\n \"\"\"\n\n def __init__(self, search_dirs: list = None):\n self.search_dirs = search_dirs or [WORKDIR]\n self.plugins = {} # name -> manifest\n\n def scan(self) -> list:\n \"\"\"Scan directories for .claude-plugin/plugin.json manifests.\"\"\"\n found = []\n for search_dir in self.search_dirs:\n plugin_dir = Path(search_dir) / \".claude-plugin\"\n manifest_path = plugin_dir / \"plugin.json\"\n if manifest_path.exists():\n try:\n manifest = json.loads(manifest_path.read_text())\n name = manifest.get(\"name\", plugin_dir.parent.name)\n self.plugins[name] = manifest\n found.append(name)\n except (json.JSONDecodeError, OSError) as e:\n print(f\"[Plugin] Failed to load {manifest_path}: {e}\")\n return found\n\n def get_mcp_servers(self) -> dict:\n \"\"\"\n Extract MCP server configs from loaded plugins.\n Returns {server_name: {command, args, env}}.\n \"\"\"\n servers = {}\n for plugin_name, manifest in self.plugins.items():\n for server_name, config in manifest.get(\"mcpServers\", {}).items():\n servers[f\"{plugin_name}__{server_name}\"] = config\n return servers\n\n\nclass MCPToolRouter:\n \"\"\"\n Routes tool calls to the correct MCP server.\n\n MCP tools are prefixed mcp__{server}__{tool} and live alongside\n native tools in the same tool pool. The router strips the prefix\n and dispatches to the right MCPClient.\n \"\"\"\n\n def __init__(self):\n self.clients = {} # server_name -> MCPClient\n\n def register_client(self, client: MCPClient):\n self.clients[client.server_name] = client\n\n def is_mcp_tool(self, tool_name: str) -> bool:\n return tool_name.startswith(\"mcp__\")\n\n def call(self, tool_name: str, arguments: dict) -> str:\n \"\"\"Route an MCP tool call to the correct server.\"\"\"\n parts = tool_name.split(\"__\", 2)\n if len(parts) != 3:\n return f\"Error: Invalid MCP tool name: {tool_name}\"\n _, server_name, actual_tool = parts\n client = self.clients.get(server_name)\n if not client:\n return f\"Error: MCP server not found: {server_name}\"\n return client.call_tool(actual_tool, arguments)\n\n def get_all_tools(self) -> list:\n \"\"\"Collect tools from all connected MCP servers.\"\"\"\n tools = []\n for client in self.clients.values():\n tools.extend(client.get_agent_tools())\n return tools\n\n\n# -- Native tool implementations (same as s02) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str) -> str:\n try:\n return safe_path(path).read_text()[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nNATIVE_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"]),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nNATIVE_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- MCP Tool Router (global) --\nmcp_router = MCPToolRouter()\nplugin_loader = PluginLoader()\n\n\ndef build_tool_pool() -> list:\n \"\"\"\n Assemble the complete tool pool: native + MCP tools.\n\n Native tools take precedence on name conflicts so the local core remains\n predictable even after external tools are added.\n \"\"\"\n all_tools = list(NATIVE_TOOLS)\n mcp_tools = mcp_router.get_all_tools()\n\n native_names = {t[\"name\"] for t in all_tools}\n for tool in mcp_tools:\n if tool[\"name\"] not in native_names:\n all_tools.append(tool)\n\n return all_tools\n\n\ndef handle_tool_call(tool_name: str, tool_input: dict) -> str:\n \"\"\"Dispatch to native handler or MCP router.\"\"\"\n if mcp_router.is_mcp_tool(tool_name):\n return mcp_router.call(tool_name, tool_input)\n handler = NATIVE_HANDLERS.get(tool_name)\n if handler:\n return handler(**tool_input)\n return f\"Unknown tool: {tool_name}\"\n\n\ndef normalize_tool_result(tool_name: str, output: str, intent: dict | None = None) -> str:\n intent = intent or permission_gate.normalize(tool_name, {})\n status = \"error\" if \"Error:\" in output or \"MCP Error:\" in output else \"ok\"\n payload = {\n \"source\": intent[\"source\"],\n \"server\": intent.get(\"server\"),\n \"tool\": intent[\"tool\"],\n \"risk\": intent[\"risk\"],\n \"status\": status,\n \"preview\": output[:500],\n }\n return json.dumps(payload, indent=2, ensure_ascii=False)\n\n\ndef agent_loop(messages: list):\n \"\"\"Agent loop with unified native + MCP tool pool.\"\"\"\n tools = build_tool_pool()\n\n while True:\n system = (\n f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\\n\"\n \"You have both native tools and MCP tools available.\\n\"\n \"MCP tools are prefixed with mcp__{server}__{tool}.\\n\"\n \"All capabilities pass through the same permission gate before execution.\"\n )\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=tools, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n decision = permission_gate.check(block.name, block.input or {})\n try:\n if decision[\"behavior\"] == \"deny\":\n output = f\"Permission denied: {decision['reason']}\"\n elif decision[\"behavior\"] == \"ask\" and not permission_gate.ask_user(\n decision[\"intent\"], block.input or {}\n ):\n output = f\"Permission denied by user: {decision['reason']}\"\n else:\n output = handle_tool_call(block.name, block.input or {})\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": normalize_tool_result(\n block.name,\n str(output),\n decision.get(\"intent\"),\n ),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\n# Further upgrades you can add later:\n# - more transports\n# - auth / approval flows\n# - server reconnect and lifecycle management\n# - filtering external tools before they reach the model\n# - richer plugin installation and update handling\n\n\nif __name__ == \"__main__\":\n # Scan for plugins\n found = plugin_loader.scan()\n if found:\n print(f\"[Plugins loaded: {', '.join(found)}]\")\n for server_name, config in plugin_loader.get_mcp_servers().items():\n mcp_client = MCPClient(server_name, config.get(\"command\", \"\"), config.get(\"args\", []))\n if mcp_client.connect():\n mcp_client.list_tools()\n mcp_router.register_client(mcp_client)\n print(f\"[MCP] Connected to {server_name}\")\n\n tool_count = len(build_tool_pool())\n mcp_count = len(mcp_router.get_all_tools())\n print(f\"[Tool pool: {tool_count} tools ({mcp_count} from MCP)]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms19 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n if query.strip() == \"/tools\":\n for tool in build_tool_pool():\n prefix = \"[MCP] \" if tool[\"name\"].startswith(\"mcp__\") else \" \"\n print(f\" {prefix}{tool['name']}: {tool.get('description', '')[:60]}\")\n continue\n\n if query.strip() == \"/mcp\":\n if mcp_router.clients:\n for name, c in mcp_router.clients.items():\n tools = c.get_agent_tools()\n print(f\" {name}: {len(tools)} tools\")\n else:\n print(\" (no MCP servers connected)\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n\n # Cleanup MCP connections\n for c in mcp_router.clients.values():\n c.disconnect()\n"
+ },
+ "source": "#!/usr/bin/env python3\n# Harness: integration -- tools aren't just in your code.\n\"\"\"\ns19_mcp_plugin.py - MCP & Plugin System\n\nThis teaching chapter focuses on the smallest useful idea:\nexternal processes can expose tools, and your agent can treat them like\nnormal tools after a small amount of normalization.\n\nMinimal path:\n 1. start an MCP server process\n 2. ask it which tools it has\n 3. prefix and register those tools\n 4. route matching calls to that server\n\nPlugins add one more layer: discovery. A tiny manifest tells the agent which\nexternal server to start.\n\nKey insight: \"External tools should enter the same tool pipeline, not form a\ncompletely separate world.\" In practice that means shared permission checks\nand normalized tool_result payloads.\n\nRead this file in this order:\n1. CapabilityPermissionGate: external tools still go through the same control gate.\n2. MCPClient: how one server connection exposes tool specs and tool calls.\n3. PluginLoader: how manifests declare external servers.\n4. MCPToolRouter / build_tool_pool: how native and external tools merge into one pool.\n\nMost common confusion:\n- a plugin manifest is not an MCP server\n- an MCP server is not a single MCP tool\n- external capability does not bypass the native permission path\n\nTeaching boundary:\nthis file teaches the smallest useful stdio MCP path.\nMarketplace details, auth flows, reconnect logic, and non-tool capability layers\nare intentionally left to bridge docs and later extensions.\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nPERMISSION_MODES = (\"default\", \"auto\")\n\n\nclass CapabilityPermissionGate:\n \"\"\"\n Shared permission gate for native tools and external capabilities.\n\n The teaching goal is simple: MCP does not bypass the control plane.\n Native tools and MCP tools both become normalized capability intents first,\n then pass through the same allow / ask policy.\n \"\"\"\n\n READ_PREFIXES = (\"read\", \"list\", \"get\", \"show\", \"search\", \"query\", \"inspect\")\n HIGH_RISK_PREFIXES = (\"delete\", \"remove\", \"drop\", \"shutdown\")\n\n def __init__(self, mode: str = \"default\"):\n self.mode = mode if mode in PERMISSION_MODES else \"default\"\n\n def normalize(self, tool_name: str, tool_input: dict) -> dict:\n if tool_name.startswith(\"mcp__\"):\n _, server_name, actual_tool = tool_name.split(\"__\", 2)\n source = \"mcp\"\n else:\n server_name = None\n actual_tool = tool_name\n source = \"native\"\n\n lowered = actual_tool.lower()\n if actual_tool == \"read_file\" or lowered.startswith(self.READ_PREFIXES):\n risk = \"read\"\n elif actual_tool == \"bash\":\n command = tool_input.get(\"command\", \"\")\n risk = \"high\" if any(\n token in command for token in (\"rm -rf\", \"sudo\", \"shutdown\", \"reboot\")\n ) else \"write\"\n elif lowered.startswith(self.HIGH_RISK_PREFIXES):\n risk = \"high\"\n else:\n risk = \"write\"\n\n return {\n \"source\": source,\n \"server\": server_name,\n \"tool\": actual_tool,\n \"risk\": risk,\n }\n\n def check(self, tool_name: str, tool_input: dict) -> dict:\n intent = self.normalize(tool_name, tool_input)\n\n if intent[\"risk\"] == \"read\":\n return {\"behavior\": \"allow\", \"reason\": \"Read capability\", \"intent\": intent}\n\n if self.mode == \"auto\" and intent[\"risk\"] != \"high\":\n return {\n \"behavior\": \"allow\",\n \"reason\": \"Auto mode for non-high-risk capability\",\n \"intent\": intent,\n }\n\n if intent[\"risk\"] == \"high\":\n return {\n \"behavior\": \"ask\",\n \"reason\": \"High-risk capability requires confirmation\",\n \"intent\": intent,\n }\n\n return {\n \"behavior\": \"ask\",\n \"reason\": \"State-changing capability requires confirmation\",\n \"intent\": intent,\n }\n\n def ask_user(self, intent: dict, tool_input: dict) -> bool:\n preview = json.dumps(tool_input, ensure_ascii=False)[:200]\n source = (\n f\"{intent['source']}:{intent['server']}/{intent['tool']}\"\n if intent.get(\"server\")\n else f\"{intent['source']}:{intent['tool']}\"\n )\n print(f\"\\n [Permission] {source} risk={intent['risk']}: {preview}\")\n try:\n answer = input(\" Allow? (y/n): \").strip().lower()\n except (EOFError, KeyboardInterrupt):\n return False\n return answer in (\"y\", \"yes\")\n\n\npermission_gate = CapabilityPermissionGate()\n\n\nclass MCPClient:\n \"\"\"\n Minimal MCP client over stdio.\n\n This is enough to teach the core architecture without dragging readers\n through every transport, auth flow, or marketplace detail up front.\n \"\"\"\n\n def __init__(self, server_name: str, command: str, args: list = None, env: dict = None):\n self.server_name = server_name\n self.command = command\n self.args = args or []\n self.env = {**os.environ, **(env or {})}\n self.process = None\n self._request_id = 0\n self._tools = [] # cached tool list\n\n def connect(self):\n \"\"\"Start the MCP server process.\"\"\"\n try:\n self.process = subprocess.Popen(\n [self.command] + self.args,\n stdin=subprocess.PIPE,\n stdout=subprocess.PIPE,\n stderr=subprocess.PIPE,\n env=self.env,\n text=True,\n )\n # Send initialize request\n self._send({\"method\": \"initialize\", \"params\": {\n \"protocolVersion\": \"2024-11-05\",\n \"capabilities\": {},\n \"clientInfo\": {\"name\": \"teaching-agent\", \"version\": \"1.0\"},\n }})\n response = self._recv()\n if response and \"result\" in response:\n # Send initialized notification\n self._send({\"method\": \"notifications/initialized\"})\n return True\n except FileNotFoundError:\n print(f\"[MCP] Server command not found: {self.command}\")\n except Exception as e:\n print(f\"[MCP] Connection failed: {e}\")\n return False\n\n def list_tools(self) -> list:\n \"\"\"Fetch available tools from the server.\"\"\"\n self._send({\"method\": \"tools/list\", \"params\": {}})\n response = self._recv()\n if response and \"result\" in response:\n self._tools = response[\"result\"].get(\"tools\", [])\n return self._tools\n\n def call_tool(self, tool_name: str, arguments: dict) -> str:\n \"\"\"Execute a tool on the server.\"\"\"\n self._send({\"method\": \"tools/call\", \"params\": {\n \"name\": tool_name,\n \"arguments\": arguments,\n }})\n response = self._recv()\n if response and \"result\" in response:\n content = response[\"result\"].get(\"content\", [])\n return \"\\n\".join(c.get(\"text\", str(c)) for c in content)\n if response and \"error\" in response:\n return f\"MCP Error: {response['error'].get('message', 'unknown')}\"\n return \"MCP Error: no response\"\n\n def get_agent_tools(self) -> list:\n \"\"\"\n Convert MCP tools to agent tool format.\n\n Teaching version uses the same simple prefix idea:\n mcp__{server_name}__{tool_name}\n \"\"\"\n agent_tools = []\n for tool in self._tools:\n prefixed_name = f\"mcp__{self.server_name}__{tool['name']}\"\n agent_tools.append({\n \"name\": prefixed_name,\n \"description\": tool.get(\"description\", \"\"),\n \"input_schema\": tool.get(\"inputSchema\", {\"type\": \"object\", \"properties\": {}}),\n \"_mcp_server\": self.server_name,\n \"_mcp_tool\": tool[\"name\"],\n })\n return agent_tools\n\n def disconnect(self):\n \"\"\"Shut down the server process.\"\"\"\n if self.process:\n try:\n self._send({\"method\": \"shutdown\"})\n self.process.terminate()\n self.process.wait(timeout=5)\n except Exception:\n self.process.kill()\n self.process = None\n\n def _send(self, message: dict):\n if not self.process or self.process.poll() is not None:\n return\n self._request_id += 1\n envelope = {\"jsonrpc\": \"2.0\", \"id\": self._request_id, **message}\n line = json.dumps(envelope) + \"\\n\"\n try:\n self.process.stdin.write(line)\n self.process.stdin.flush()\n except (BrokenPipeError, OSError):\n pass\n\n def _recv(self) -> dict | None:\n if not self.process or self.process.poll() is not None:\n return None\n try:\n line = self.process.stdout.readline()\n if line:\n return json.loads(line)\n except (json.JSONDecodeError, OSError):\n pass\n return None\n\n\nclass PluginLoader:\n \"\"\"\n Load plugins from .claude-plugin/ directories.\n\n Teaching version implements the smallest useful plugin flow:\n read a manifest, discover MCP server configs, and register them.\n \"\"\"\n\n def __init__(self, search_dirs: list = None):\n self.search_dirs = search_dirs or [WORKDIR]\n self.plugins = {} # name -> manifest\n\n def scan(self) -> list:\n \"\"\"Scan directories for .claude-plugin/plugin.json manifests.\"\"\"\n found = []\n for search_dir in self.search_dirs:\n plugin_dir = Path(search_dir) / \".claude-plugin\"\n manifest_path = plugin_dir / \"plugin.json\"\n if manifest_path.exists():\n try:\n manifest = json.loads(manifest_path.read_text())\n name = manifest.get(\"name\", plugin_dir.parent.name)\n self.plugins[name] = manifest\n found.append(name)\n except (json.JSONDecodeError, OSError) as e:\n print(f\"[Plugin] Failed to load {manifest_path}: {e}\")\n return found\n\n def get_mcp_servers(self) -> dict:\n \"\"\"\n Extract MCP server configs from loaded plugins.\n Returns {server_name: {command, args, env}}.\n \"\"\"\n servers = {}\n for plugin_name, manifest in self.plugins.items():\n for server_name, config in manifest.get(\"mcpServers\", {}).items():\n servers[f\"{plugin_name}__{server_name}\"] = config\n return servers\n\n\nclass MCPToolRouter:\n \"\"\"\n Routes tool calls to the correct MCP server.\n\n MCP tools are prefixed mcp__{server}__{tool} and live alongside\n native tools in the same tool pool. The router strips the prefix\n and dispatches to the right MCPClient.\n \"\"\"\n\n def __init__(self):\n self.clients = {} # server_name -> MCPClient\n\n def register_client(self, client: MCPClient):\n self.clients[client.server_name] = client\n\n def is_mcp_tool(self, tool_name: str) -> bool:\n return tool_name.startswith(\"mcp__\")\n\n def call(self, tool_name: str, arguments: dict) -> str:\n \"\"\"Route an MCP tool call to the correct server.\"\"\"\n parts = tool_name.split(\"__\", 2)\n if len(parts) != 3:\n return f\"Error: Invalid MCP tool name: {tool_name}\"\n _, server_name, actual_tool = parts\n client = self.clients.get(server_name)\n if not client:\n return f\"Error: MCP server not found: {server_name}\"\n return client.call_tool(actual_tool, arguments)\n\n def get_all_tools(self) -> list:\n \"\"\"Collect tools from all connected MCP servers.\"\"\"\n tools = []\n for client in self.clients.values():\n tools.extend(client.get_agent_tools())\n return tools\n\n\n# -- Native tool implementations (same as s02) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str) -> str:\n try:\n return safe_path(path).read_text()[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nNATIVE_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"]),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nNATIVE_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- MCP Tool Router (global) --\nmcp_router = MCPToolRouter()\nplugin_loader = PluginLoader()\n\n\ndef build_tool_pool() -> list:\n \"\"\"\n Assemble the complete tool pool: native + MCP tools.\n\n Native tools take precedence on name conflicts so the local core remains\n predictable even after external tools are added.\n \"\"\"\n all_tools = list(NATIVE_TOOLS)\n mcp_tools = mcp_router.get_all_tools()\n\n native_names = {t[\"name\"] for t in all_tools}\n for tool in mcp_tools:\n if tool[\"name\"] not in native_names:\n all_tools.append(tool)\n\n return all_tools\n\n\ndef handle_tool_call(tool_name: str, tool_input: dict) -> str:\n \"\"\"Dispatch to native handler or MCP router.\"\"\"\n if mcp_router.is_mcp_tool(tool_name):\n return mcp_router.call(tool_name, tool_input)\n handler = NATIVE_HANDLERS.get(tool_name)\n if handler:\n return handler(**tool_input)\n return f\"Unknown tool: {tool_name}\"\n\n\ndef normalize_tool_result(tool_name: str, output: str, intent: dict | None = None) -> str:\n intent = intent or permission_gate.normalize(tool_name, {})\n status = \"error\" if \"Error:\" in output or \"MCP Error:\" in output else \"ok\"\n payload = {\n \"source\": intent[\"source\"],\n \"server\": intent.get(\"server\"),\n \"tool\": intent[\"tool\"],\n \"risk\": intent[\"risk\"],\n \"status\": status,\n \"preview\": output[:500],\n }\n return json.dumps(payload, indent=2, ensure_ascii=False)\n\n\ndef agent_loop(messages: list):\n \"\"\"Agent loop with unified native + MCP tool pool.\"\"\"\n tools = build_tool_pool()\n\n while True:\n system = (\n f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\\n\"\n \"You have both native tools and MCP tools available.\\n\"\n \"MCP tools are prefixed with mcp__{server}__{tool}.\\n\"\n \"All capabilities pass through the same permission gate before execution.\"\n )\n response = client.messages.create(\n model=MODEL, system=system, messages=messages,\n tools=tools, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type != \"tool_use\":\n continue\n decision = permission_gate.check(block.name, block.input or {})\n try:\n if decision[\"behavior\"] == \"deny\":\n output = f\"Permission denied: {decision['reason']}\"\n elif decision[\"behavior\"] == \"ask\" and not permission_gate.ask_user(\n decision[\"intent\"], block.input or {}\n ):\n output = f\"Permission denied by user: {decision['reason']}\"\n else:\n output = handle_tool_call(block.name, block.input or {})\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": normalize_tool_result(\n block.name,\n str(output),\n decision.get(\"intent\"),\n ),\n })\n\n messages.append({\"role\": \"user\", \"content\": results})\n\n\n# Further upgrades you can add later:\n# - more transports\n# - auth / approval flows\n# - server reconnect and lifecycle management\n# - filtering external tools before they reach the model\n# - richer plugin installation and update handling\n\n\nif __name__ == \"__main__\":\n # Scan for plugins\n found = plugin_loader.scan()\n if found:\n print(f\"[Plugins loaded: {', '.join(found)}]\")\n for server_name, config in plugin_loader.get_mcp_servers().items():\n mcp_client = MCPClient(server_name, config.get(\"command\", \"\"), config.get(\"args\", []))\n if mcp_client.connect():\n mcp_client.list_tools()\n mcp_router.register_client(mcp_client)\n print(f\"[MCP] Connected to {server_name}\")\n\n tool_count = len(build_tool_pool())\n mcp_count = len(mcp_router.get_all_tools())\n print(f\"[Tool pool: {tool_count} tools ({mcp_count} from MCP)]\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms19 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n\n if query.strip() == \"/tools\":\n for tool in build_tool_pool():\n prefix = \"[MCP] \" if tool[\"name\"].startswith(\"mcp__\") else \" \"\n print(f\" {prefix}{tool['name']}: {tool.get('description', '')[:60]}\")\n continue\n\n if query.strip() == \"/mcp\":\n if mcp_router.clients:\n for name, c in mcp_router.clients.items():\n tools = c.get_agent_tools()\n print(f\" {name}: {len(tools)} tools\")\n else:\n print(\" (no MCP servers connected)\")\n continue\n\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n\n # Cleanup MCP connections\n for c in mcp_router.clients.values():\n c.disconnect()\n"
}
],
"diffs": [
@@ -837,68 +1562,150 @@
"safe_path",
"run_read",
"run_write",
- "run_edit"
+ "run_edit",
+ "normalize_messages"
],
"newTools": [
"read_file",
"write_file",
"edit_file"
],
- "locDelta": 36
+ "locDelta": 39
},
{
"from": "s02",
"to": "s03",
"newClasses": [
+ "PlanItem",
+ "PlanningState",
"TodoManager"
],
- "newFunctions": [],
+ "newFunctions": [
+ "extract_text"
+ ],
"newTools": [
"todo"
],
- "locDelta": 56
+ "locDelta": 110
},
{
"from": "s03",
"to": "s04",
- "newClasses": [],
+ "newClasses": [
+ "AgentTemplate"
+ ],
"newFunctions": [
"run_subagent"
],
"newTools": [
"task"
],
- "locDelta": -25
+ "locDelta": -79
},
{
"from": "s04",
"to": "s05",
"newClasses": [
- "SkillLoader"
+ "SkillManifest",
+ "SkillDocument",
+ "SkillRegistry"
+ ],
+ "newFunctions": [
+ "extract_text"
],
- "newFunctions": [],
"newTools": [
"load_skill"
],
- "locDelta": 36
+ "locDelta": 44
},
{
"from": "s05",
"to": "s06",
- "newClasses": [],
+ "newClasses": [
+ "CompactState"
+ ],
"newFunctions": [
- "estimate_tokens",
+ "estimate_context_size",
+ "track_recent_file",
+ "persist_large_output",
+ "collect_tool_result_blocks",
"micro_compact",
- "auto_compact"
+ "write_transcript",
+ "summarize_history",
+ "compact_history",
+ "execute_tool"
],
"newTools": [
"compact"
],
- "locDelta": 18
+ "locDelta": 64
},
{
"from": "s06",
"to": "s07",
+ "newClasses": [
+ "BashSecurityValidator",
+ "PermissionManager"
+ ],
+ "newFunctions": [
+ "is_workspace_trusted"
+ ],
+ "newTools": [],
+ "locDelta": 0
+ },
+ {
+ "from": "s07",
+ "to": "s08",
+ "newClasses": [
+ "HookManager"
+ ],
+ "newFunctions": [],
+ "newTools": [],
+ "locDelta": -56
+ },
+ {
+ "from": "s08",
+ "to": "s09",
+ "newClasses": [
+ "MemoryManager",
+ "DreamConsolidator"
+ ],
+ "newFunctions": [
+ "run_save_memory",
+ "build_system_prompt"
+ ],
+ "newTools": [
+ "save_memory"
+ ],
+ "locDelta": 162
+ },
+ {
+ "from": "s09",
+ "to": "s10",
+ "newClasses": [
+ "SystemPromptBuilder"
+ ],
+ "newFunctions": [
+ "build_system_reminder"
+ ],
+ "newTools": [],
+ "locDelta": -109
+ },
+ {
+ "from": "s10",
+ "to": "s11",
+ "newClasses": [],
+ "newFunctions": [
+ "estimate_tokens",
+ "auto_compact",
+ "backoff_delay"
+ ],
+ "newTools": [],
+ "locDelta": -56
+ },
+ {
+ "from": "s11",
+ "to": "s12",
"newClasses": [
"TaskManager"
],
@@ -909,12 +1716,13 @@
"task_list",
"task_get"
],
- "locDelta": 2
+ "locDelta": -22
},
{
- "from": "s07",
- "to": "s08",
+ "from": "s12",
+ "to": "s13",
"newClasses": [
+ "NotificationQueue",
"BackgroundManager"
],
"newFunctions": [],
@@ -922,11 +1730,29 @@
"background_run",
"check_background"
],
- "locDelta": -9
+ "locDelta": 60
},
{
- "from": "s08",
- "to": "s09",
+ "from": "s13",
+ "to": "s14",
+ "newClasses": [
+ "CronLock",
+ "CronScheduler"
+ ],
+ "newFunctions": [
+ "cron_matches",
+ "_field_matches"
+ ],
+ "newTools": [
+ "cron_create",
+ "cron_delete",
+ "cron_list"
+ ],
+ "locDelta": 165
+ },
+ {
+ "from": "s14",
+ "to": "s15",
"newClasses": [
"MessageBus",
"TeammateManager"
@@ -946,12 +1772,14 @@
"list_teammates",
"broadcast"
],
- "locDelta": 150
+ "locDelta": -102
},
{
- "from": "s09",
- "to": "s10",
- "newClasses": [],
+ "from": "s15",
+ "to": "s16",
+ "newClasses": [
+ "RequestStore"
+ ],
"newFunctions": [
"handle_shutdown_request",
"handle_plan_review",
@@ -962,26 +1790,29 @@
"plan_approval",
"shutdown_request"
],
- "locDelta": 71
+ "locDelta": 132
},
{
- "from": "s10",
- "to": "s11",
+ "from": "s16",
+ "to": "s17",
"newClasses": [],
"newFunctions": [
+ "_append_claim_event",
+ "_task_allows_role",
+ "is_claimable_task",
"scan_unclaimed_tasks",
- "claim_task",
- "make_identity_block"
+ "make_identity_block",
+ "ensure_identity_context"
],
"newTools": [
"idle",
"claim_task"
],
- "locDelta": 80
+ "locDelta": 121
},
{
- "from": "s11",
- "to": "s12",
+ "from": "s17",
+ "to": "s18",
"newClasses": [
"EventBus",
"TaskManager",
@@ -1003,13 +1834,32 @@
"task_bind_worktree",
"worktree_create",
"worktree_list",
+ "worktree_enter",
"worktree_status",
"worktree_run",
+ "worktree_closeout",
"worktree_remove",
"worktree_keep",
"worktree_events"
],
- "locDelta": 195
+ "locDelta": -39
+ },
+ {
+ "from": "s18",
+ "to": "s19",
+ "newClasses": [
+ "CapabilityPermissionGate",
+ "MCPClient",
+ "PluginLoader",
+ "MCPToolRouter"
+ ],
+ "newFunctions": [
+ "build_tool_pool",
+ "handle_tool_call",
+ "normalize_tool_result"
+ ],
+ "newTools": [],
+ "locDelta": -101
}
]
}
\ No newline at end of file
diff --git a/web/src/data/scenarios/zh/s01.json b/web/src/data/scenarios/zh/s01.json
new file mode 100644
index 000000000..06325d0b9
--- /dev/null
+++ b/web/src/data/scenarios/zh/s01.json
@@ -0,0 +1,51 @@
+{
+ "version": "s01",
+ "title": "Agent 循环",
+ "description": "一个仅使用 bash(命令行)即可完成任务的最小 Agent",
+ "steps": [
+ {
+ "type": "user_message",
+ "content": "创建一个名为 hello.py 的文件,输出 \"Hello, World!\"",
+ "annotation": "用户向 Agent 提交任务"
+ },
+ {
+ "type": "assistant_text",
+ "content": "我会用 bash 帮你创建这个文件。",
+ "annotation": "模型决定使用唯一工具:bash"
+ },
+ {
+ "type": "tool_call",
+ "content": "echo 'print(\"Hello, World!\")' > hello.py",
+ "toolName": "bash",
+ "annotation": "工具调用:模型生成 bash 命令"
+ },
+ {
+ "type": "tool_result",
+ "content": "",
+ "toolName": "bash",
+ "annotation": "bash 返回空输出(表示成功)"
+ },
+ {
+ "type": "assistant_text",
+ "content": "我再验证一下文件是否写对了。",
+ "annotation": "循环继续:模型看到结果后决定下一步动作"
+ },
+ {
+ "type": "tool_call",
+ "content": "cat hello.py",
+ "toolName": "bash",
+ "annotation": "再次调用 bash 做结果校验"
+ },
+ {
+ "type": "tool_result",
+ "content": "print(\"Hello, World!\")",
+ "toolName": "bash",
+ "annotation": "文件内容确认无误"
+ },
+ {
+ "type": "assistant_text",
+ "content": "完成了!我已经创建了 hello.py。你可以运行 `python hello.py` 进行验证。",
+ "annotation": "stop_reason != tool_use(工具调用)-> 循环退出并回复用户"
+ }
+ ]
+}
diff --git a/web/src/data/scenarios/zh/s02.json b/web/src/data/scenarios/zh/s02.json
new file mode 100644
index 000000000..7505fbb41
--- /dev/null
+++ b/web/src/data/scenarios/zh/s02.json
@@ -0,0 +1,46 @@
+{
+ "version": "s02",
+ "title": "工具系统",
+ "description": "具备 read/write/edit/bash 四类工具的 Agent",
+ "steps": [
+ {
+ "type": "user_message",
+ "content": "给 hello.py 增加一个 greet(name) 函数",
+ "annotation": "用户请求修改代码"
+ },
+ {
+ "type": "assistant_text",
+ "content": "我先读取当前文件内容。",
+ "annotation": "模型优先选择 read_file(读文件),而不是 bash cat"
+ },
+ {
+ "type": "tool_call",
+ "content": "hello.py",
+ "toolName": "read_file",
+ "annotation": "专用读取工具可以更稳定地访问文件"
+ },
+ {
+ "type": "tool_result",
+ "content": "print(\"Hello, World!\")",
+ "toolName": "read_file",
+ "annotation": "文件内容回传给模型"
+ },
+ {
+ "type": "tool_call",
+ "content": "hello.py\n---\ndef greet(name):\n print(f\"Hello, {name}!\")\n\ngreet(\"World\")",
+ "toolName": "write_file",
+ "annotation": "write_file(写文件)直接替换整个文件内容"
+ },
+ {
+ "type": "tool_result",
+ "content": "文件写入成功",
+ "toolName": "write_file",
+ "annotation": "写入已确认"
+ },
+ {
+ "type": "assistant_text",
+ "content": "已更新 hello.py,新增了可接收参数的 greet(name) 函数。",
+ "annotation": "4 个专用工具比 1 个通用工具更清晰:每个工具职责明确"
+ }
+ ]
+}
diff --git a/web/src/data/scenarios/zh/s03.json b/web/src/data/scenarios/zh/s03.json
new file mode 100644
index 000000000..87b150291
--- /dev/null
+++ b/web/src/data/scenarios/zh/s03.json
@@ -0,0 +1,53 @@
+{
+ "version": "s03",
+ "title": "TodoWrite(待办写入)",
+ "description": "Agent 在执行前先生成可见计划",
+ "steps": [
+ {
+ "type": "user_message",
+ "content": "构建一个 CLI 计算器,支持加减乘除",
+ "annotation": "这是一个多步骤任务,先规划再执行更稳定"
+ },
+ {
+ "type": "tool_call",
+ "content": "[\n { \"id\": 1, \"task\": \"创建 calc.py 并解析参数\", \"status\": \"in_progress\" },\n { \"id\": 2, \"task\": \"实现 4 种运算\", \"status\": \"pending\" },\n { \"id\": 3, \"task\": \"补充错误处理\", \"status\": \"pending\" },\n { \"id\": 4, \"task\": \"测试全部运算\", \"status\": \"pending\" }\n]",
+ "toolName": "todo_write",
+ "annotation": "TodoManager 在写代码前先产出可见计划"
+ },
+ {
+ "type": "tool_result",
+ "content": "已创建 4 项待办计划",
+ "toolName": "todo_write",
+ "annotation": "计划现在可追踪、可更新"
+ },
+ {
+ "type": "tool_call",
+ "content": "calc.py\n---\nimport sys\n\ndef calculate(op, a, b):\n ops = {'+': a+b, '-': a-b, '*': a*b, '/': a/b}\n return ops.get(op, 'Unknown operator')\n\nif __name__ == '__main__':\n a, op, b = float(sys.argv[1]), sys.argv[2], float(sys.argv[3])\n print(calculate(op, a, b))",
+ "toolName": "write_file",
+ "annotation": "执行第 1 和第 2 项:创建文件并实现运算逻辑"
+ },
+ {
+ "type": "tool_result",
+ "content": "文件写入成功",
+ "toolName": "write_file",
+ "annotation": "代码已落盘"
+ },
+ {
+ "type": "tool_call",
+ "content": "python calc.py 10 + 5",
+ "toolName": "bash",
+ "annotation": "执行第 4 项:运行测试"
+ },
+ {
+ "type": "tool_result",
+ "content": "15.0",
+ "toolName": "bash",
+ "annotation": "测试通过"
+ },
+ {
+ "type": "assistant_text",
+ "content": "计算器已构建并验证,四则运算均可正常工作。",
+ "annotation": "可见计划让执行进度可追踪,从而提高完成率"
+ }
+ ]
+}
diff --git a/web/src/data/scenarios/zh/s04.json b/web/src/data/scenarios/zh/s04.json
new file mode 100644
index 000000000..332812976
--- /dev/null
+++ b/web/src/data/scenarios/zh/s04.json
@@ -0,0 +1,51 @@
+{
+ "version": "s04",
+ "title": "子代理",
+ "description": "Agent 将子任务委托给 subagent(子代理)并隔离上下文执行",
+ "steps": [
+ {
+ "type": "user_message",
+ "content": "重构计算器:拆分模块并补上测试",
+ "annotation": "这是一个适合委托的复杂任务"
+ },
+ {
+ "type": "assistant_text",
+ "content": "我会把“编写测试”委托给子代理,我来处理重构主线。",
+ "annotation": "Agent 决定通过 Task 工具拆分工作"
+ },
+ {
+ "type": "tool_call",
+ "content": "为 calc.py 编写单元测试,覆盖 4 种运算和除零等边界情况",
+ "toolName": "task",
+ "annotation": "Task(任务委托)工具会启动一个拥有独立上下文的子代理"
+ },
+ {
+ "type": "system_event",
+ "content": "子代理已创建:fresh messages[]、共享工具集、scoped prompt(范围化提示)",
+ "annotation": "子代理拿到干净上下文,避免被父流程历史干扰"
+ },
+ {
+ "type": "tool_result",
+ "content": "已创建 test_calc.py,包含 8 个测试用例,全部通过。",
+ "toolName": "task",
+ "annotation": "子代理完成后向父代理返回摘要结果"
+ },
+ {
+ "type": "tool_call",
+ "content": "calc.py -> calc/operations.py, calc/cli.py",
+ "toolName": "write_file",
+ "annotation": "父代理继续推进自己的主线任务:代码重构"
+ },
+ {
+ "type": "tool_result",
+ "content": "文件写入成功",
+ "toolName": "write_file",
+ "annotation": "模块拆分完成"
+ },
+ {
+ "type": "assistant_text",
+ "content": "已重构为 calc/operations.py 与 calc/cli.py;测试由子代理完成并全部通过。",
+ "annotation": "上下文隔离可避免不同子任务之间的语义串扰"
+ }
+ ]
+}
diff --git a/web/src/data/scenarios/zh/s05.json b/web/src/data/scenarios/zh/s05.json
new file mode 100644
index 000000000..50b73868b
--- /dev/null
+++ b/web/src/data/scenarios/zh/s05.json
@@ -0,0 +1,44 @@
+{
+ "version": "s05",
+ "title": "技能系统",
+ "description": "Agent 从 SKILL.md 文件按需加载外部知识",
+ "steps": [
+ {
+ "type": "user_message",
+ "content": "帮我初始化一个带 TypeScript 的 Next.js 项目",
+ "annotation": "该任务需要框架专项知识"
+ },
+ {
+ "type": "system_event",
+ "content": "[SkillLoader] 扫描 SKILL.md 文件... 已发现:nextjs.SKILL.md",
+ "annotation": "在主循环执行前,SkillLoader 先发现匹配技能"
+ },
+ {
+ "type": "system_event",
+ "content": "[Skill 注入] nextjs.SKILL.md -> 已追加到 tool_result 消息",
+ "annotation": "技能内容以 tool_result(工具结果)形式注入,而不是塞进 system prompt"
+ },
+ {
+ "type": "assistant_text",
+ "content": "我已获得 Next.js 初始化知识,现在开始创建项目结构。",
+ "annotation": "模型已具备来自技能文件的框架上下文"
+ },
+ {
+ "type": "tool_call",
+ "content": "npx create-next-app@latest my-app --typescript --tailwind --eslint --app --src-dir",
+ "toolName": "bash",
+ "annotation": "按技能知识提供的正确参数执行命令"
+ },
+ {
+ "type": "tool_result",
+ "content": "成功!已在 /workspace/my-app 创建项目",
+ "toolName": "bash",
+ "annotation": "项目已按目标配置创建完成"
+ },
+ {
+ "type": "assistant_text",
+ "content": "Next.js 项目已创建,包含 TypeScript、Tailwind CSS、ESLint、App Router 和 src 目录。",
+ "annotation": "技能通过 tool_result 注入,避免 system prompt 过重"
+ }
+ ]
+}
diff --git a/web/src/data/scenarios/zh/s06.json b/web/src/data/scenarios/zh/s06.json
new file mode 100644
index 000000000..5b2fa55ba
--- /dev/null
+++ b/web/src/data/scenarios/zh/s06.json
@@ -0,0 +1,51 @@
+{
+ "version": "s06",
+ "title": "上下文压缩",
+ "description": "当 token 超阈值时,Agent 自动压缩上下文继续工作",
+ "steps": [
+ {
+ "type": "user_message",
+ "content": "分析这个项目里所有 Python 文件,并分别给出摘要",
+ "annotation": "这是会快速膨胀上下文的大任务"
+ },
+ {
+ "type": "tool_call",
+ "content": "find . -name '*.py' -type f",
+ "toolName": "bash",
+ "annotation": "先找出全部待分析文件"
+ },
+ {
+ "type": "tool_result",
+ "content": "./src/main.py\n./src/utils.py\n./src/models.py\n./tests/test_main.py\n... (另外 28 个文件)",
+ "toolName": "bash",
+ "annotation": "共发现 32 个文件;全部读取会产生大量上下文"
+ },
+ {
+ "type": "assistant_text",
+ "content": "我会按顺序逐个读取并分析。",
+ "annotation": "Agent 开始逐文件推进"
+ },
+ {
+ "type": "tool_call",
+ "content": "./src/main.py",
+ "toolName": "read_file",
+ "annotation": "读取第 1/32 个文件..."
+ },
+ {
+ "type": "tool_result",
+ "content": "[... 450 行代码 ...]",
+ "toolName": "read_file",
+ "annotation": "大型文件内容被追加到 messages[]"
+ },
+ {
+ "type": "system_event",
+ "content": "[ContextManager] Token 计数:148,231 / 阈值 150,000\n触发三层压缩:\n 1. 旧 tool_result 内容 > 1000 字符时替换为摘要\n 2. 合并连续 assistant 轮次\n 3. 汇总最早 50% 的会话\n结果:148,231 -> 42,087 tokens",
+ "annotation": "接近上限时自动触发压缩,防止上下文爆炸"
+ },
+ {
+ "type": "assistant_text",
+ "content": "我会在压缩后的上下文上继续分析。前 1-18 个文件已摘要,接下来处理剩余文件。",
+ "annotation": "通过遗忘旧细节并保留结构化摘要,Agent 可持续处理长任务"
+ }
+ ]
+}
diff --git a/web/src/i18n/messages/en.json b/web/src/i18n/messages/en.json
index 6dcb3effb..9fd25bcfd 100644
--- a/web/src/i18n/messages/en.json
+++ b/web/src/i18n/messages/en.json
@@ -1,24 +1,155 @@
{
- "meta": { "title": "Learn Claude Code", "description": "Build a nano Claude Code-like agent from 0 to 1, one mechanism at a time" },
- "nav": { "home": "Home", "timeline": "Timeline", "compare": "Compare", "layers": "Layers", "github": "GitHub" },
- "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "Build a nano Claude Code-like agent from 0 to 1, one mechanism at a time", "start": "Start Learning", "core_pattern": "The Core Pattern", "core_pattern_desc": "Every AI coding agent shares the same loop: call the model, execute tools, feed results back. Production systems add policy, permissions, and lifecycle layers on top.", "learning_path": "Learning Path", "learning_path_desc": "12 progressive sessions, from a simple loop to isolated autonomous execution", "layers_title": "Architectural Layers", "layers_desc": "Five orthogonal concerns that compose into a complete agent", "loc": "LOC", "learn_more": "Learn More", "versions_in_layer": "versions", "message_flow": "Message Growth", "message_flow_desc": "Watch the messages array grow as the agent loop executes" },
- "version": { "loc": "lines of code", "tools": "tools", "new": "New", "prev": "Previous", "next": "Next", "view_source": "View Source", "view_diff": "View Diff", "design_decisions": "Design Decisions", "whats_new": "What's New", "tutorial": "Tutorial", "simulator": "Agent Loop Simulator", "execution_flow": "Execution Flow", "architecture": "Architecture", "concept_viz": "Concept Visualization", "alternatives": "Alternatives Considered", "tab_learn": "Learn", "tab_simulate": "Simulate", "tab_code": "Code", "tab_deep_dive": "Deep Dive" },
- "sim": { "play": "Play", "pause": "Pause", "step": "Step", "reset": "Reset", "speed": "Speed", "step_of": "of" },
- "timeline": { "title": "Learning Path", "subtitle": "s01 to s12: Progressive Agent Design", "layer_legend": "Layer Legend", "loc_growth": "LOC Growth", "learn_more": "Learn More" },
+ "meta": {
+ "title": "Learn Claude Code",
+ "description": "19 chapters, 4 stages, from zero to a high-completion Claude Code-like agent"
+ },
+ "nav": {
+ "home": "Home",
+ "reference": "Reference",
+ "compare": "Compare",
+ "github": "GitHub"
+ },
+ "reference": {
+ "title": "Reference",
+ "subtitle": "Glossary, architecture maps, and deep dive companion docs.",
+ "foundation_title": "Foundation Documents",
+ "deep_dive_title": "Deep Dive Documents"
+ },
+ "home": {
+ "hero_title": "Learn Claude Code",
+ "hero_subtitle": "19 chapters across 4 stages, from the minimal loop to a multi-agent platform and external capability bus",
+ "start": "Start Learning",
+ "entry_title": "Four Strong Entry Points",
+ "entry_desc": "If this is your first time here, do not jump into random chapters. Decide whether you want the mainline, the stage map, or a chapter jump comparison first.",
+ "entry_start_title": "Start From the Minimal Loop",
+ "entry_start_desc": "Best for first-time readers. Get the smallest agent loop working before you add tools, planning, and context management.",
+ "entry_start_action": "Open s01",
+ "entry_timeline_title": "Follow the Full Mainline",
+ "entry_timeline_desc": "Best if you want the full teaching sequence. You will see how the system grows chapter by chapter.",
+ "entry_timeline_action": "Open Timeline",
+ "entry_layers_title": "Understand the Four Stages First",
+ "entry_layers_desc": "Best if you want to understand why the repository is split into stages before diving into chapter detail.",
+ "entry_layers_action": "Open Stages",
+ "entry_compare_title": "Compare Two Steps When You Get Stuck",
+ "entry_compare_desc": "Best if you are already mid-way through the course and want to see exactly what capability one chapter adds over another.",
+ "entry_compare_action": "Open Compare",
+ "core_pattern": "The Core Pattern",
+ "core_pattern_desc": "Every AI coding agent grows from the same loop: call the model, execute tools, append results. Permissions, memory, tasks, teams, and plugins all extend that same loop rather than replacing it.",
+ "learning_path": "Learning Path",
+ "learning_path_desc": "19 progressive chapters grouped into core loop, system hardening, task runtime, and multi-agent platform stages",
+ "layers_title": "Four Stages",
+ "layers_desc": "The goal is not to dump details, but to teach the system in the order a developer can actually absorb and rebuild it.",
+ "guide_label": "Reading Guide",
+ "guide_start_title": "First Read: Lock In The Core Loop",
+ "guide_start_desc": "If you have not really internalized s01-s06 yet, do not rush into multi-agent or MCP. Build the single-agent loop first.",
+ "guide_middle_title": "Middle Read: Separate The State Layers",
+ "guide_middle_desc": "When task, runtime task, teammate, and worktree begin to blur together, step back and re-check the phase boundaries.",
+ "guide_finish_title": "Late Read: Watch The Platform Boundary",
+ "guide_finish_desc": "From s15 onward, the main question is not just more features. It is how the system boundary expands into multiple workers and external capabilities.",
+ "loc": "LOC",
+ "learn_more": "Learn More",
+ "versions_in_layer": "chapters",
+ "message_flow": "Message Growth",
+ "message_flow_desc": "Watch how messages[] grows during a real agent loop with tool calls and results"
+ },
+ "version": {
+ "loc": "lines of code",
+ "tools": "tools",
+ "new": "New",
+ "prev": "Previous",
+ "next": "Next",
+ "view_source": "View Source",
+ "view_diff": "View Diff",
+ "design_decisions": "Design Decisions",
+ "whats_new": "What's New",
+ "tutorial": "Tutorial",
+ "simulator": "Chapter Simulator",
+ "execution_flow": "Execution Flow",
+ "architecture": "Architecture",
+ "concept_viz": "Concept Visualization",
+ "alternatives": "Alternatives Considered",
+ "tab_learn": "Learn",
+ "tab_simulate": "Simulate",
+ "tab_code": "Code",
+ "tab_deep_dive": "Deep Dive",
+ "guide_label": "Chapter Guide",
+ "guide_addition_title": "Core Structure Added In This Chapter",
+ "guide_addition_empty": "This chapter mainly connects and hardens existing structures rather than introducing one brand-new major module.",
+ "guide_focus_title": "What To Focus On First",
+ "guide_focus_fallback": "Start with the core input, state, and output relationship before you dive into implementation detail.",
+ "guide_confusion_title": "What People Usually Mix Up",
+ "guide_confusion_fallback": "If concepts start to blur, return to the question this mechanism solves and the extra capability it adds over the previous chapter.",
+ "guide_goal_title": "What You Should Be Able To Build Afterward",
+ "guide_goal_fallback": "After this chapter, you should be able to reconnect this mechanism into your own agent system.",
+ "bridge_docs_label": "Bridge Reading",
+ "bridge_docs_title": "Deep Dives -- Optional Reading",
+ "bridge_docs_intro": "These companion pages clarify the concepts most likely to blur in this chapter. Read them when you feel confused, not as prerequisites.",
+ "bridge_docs_open": "Open Note",
+ "bridge_docs_kind_map": "Map",
+ "bridge_docs_kind_mechanism": "Mechanism",
+ "bridge_docs_fallback": "Fallback",
+ "bridge_docs_back": "Back To Learning Path",
+ "bridge_docs_standalone": "Deep Dive",
+ "bridge_docs_fallback_note": "This document is not available in the current locale. Showing fallback:"
+ },
+ "sim": {
+ "play": "Play",
+ "pause": "Pause",
+ "step": "Step",
+ "previous_step": "Previous step",
+ "next_step": "Next step",
+ "autoplay": "Auto-play",
+ "reset": "Reset",
+ "speed": "Speed",
+ "step_of": "of"
+ },
+ "timeline": {
+ "title": "Learning Path",
+ "subtitle": "s01 to s19: build the agent system progressively across 4 stages",
+ "layer_legend": "Stage Legend",
+ "loc_growth": "LOC Growth",
+ "learn_more": "Learn More"
+ },
"layers": {
- "title": "Architectural Layers",
- "subtitle": "Five orthogonal concerns that compose into a complete agent",
- "tools": "What the agent CAN do. The foundation: tools give the model capabilities to interact with the world.",
- "planning": "How work is organized. From simple todo lists to dependency-aware task boards shared across agents.",
- "memory": "Keeping context within limits. Compression strategies that let agents work infinitely without losing coherence.",
- "concurrency": "Non-blocking execution. Background threads and notification buses for parallel work.",
- "collaboration": "Multi-agent coordination. Teams, messaging, and autonomous teammates that think for themselves."
+ "title": "Architecture Stages",
+ "subtitle": "Start from the smallest working loop, then add control, durability, background execution, multi-agent coordination, and external capability routing.",
+ "guide_label": "How To Read",
+ "guide_start_title": "If This Is Your First Pass",
+ "guide_start_desc": "Start at the first stage. Do not skip the core loop, because every later capability wraps around it.",
+ "guide_middle_title": "If The Middle Starts To Blur",
+ "guide_middle_desc": "Check which stage a chapter belongs to first. Then ask whether it is adding safety, state, runtime behavior, or platform surface.",
+ "guide_finish_title": "If You Want To Rebuild It Yourself",
+ "guide_finish_desc": "After each stage, you should end up with one real system slice you could implement, not just more vocabulary.",
+ "core": "Core Loop: build the smallest useful single-agent system first, including tools, planning, delegation, skills, and context compaction.",
+ "core_outcome": "After this stage, you should be able to build a working single-agent harness on your own.",
+ "hardening": "System Hardening: move from 'it runs' to 'it runs safely and predictably' with permissions, hooks, memory, prompt assembly, and error recovery.",
+ "hardening_outcome": "After this stage, you should know how to make the agent safer, steadier, and easier to extend.",
+ "runtime": "Task Runtime: turn work from session-local planning into durable, background, and scheduled execution.",
+ "runtime_outcome": "After this stage, you should be able to lift chat-level steps into a durable runtime task system.",
+ "platform": "Multi-Agent Platform: add persistent teammates, protocols, autonomy, isolated execution lanes, and MCP / plugin capability routing.",
+ "platform_outcome": "After this stage, you should be able to grow a single agent into a collaborative platform."
},
"compare": {
- "title": "Compare Versions",
- "subtitle": "See what changed between any two versions",
- "select_a": "Version A",
- "select_b": "Version B",
+ "title": "Learning Path Compare",
+ "subtitle": "Compare what capability is introduced between two chapters, why it appears there, and what you should focus on first.",
+ "learning_jump": "Learning Jump",
+ "selector_title": "Choose the step you want to compare",
+ "selector_note": "This page is designed to explain capability shifts before it throws you into code-level detail.",
+ "select_a": "Chapter A",
+ "select_b": "Chapter B",
+ "select_placeholder": "-- select --",
+ "carry_from_a": "Carry From A",
+ "new_in_b": "New In B",
+ "progression": "Progression",
+ "progression_same_chapter": "You selected the same chapter twice. Useful for rereading one chapter, not for studying a capability jump.",
+ "progression_reverse": "This is a backward-looking comparison. Use it to see which ideas in a later system actually come from earlier chapters.",
+ "progression_direct": "This is the next natural step in the path. It is the best way to study how the system grows chapter by chapter.",
+ "progression_same_layer": "Both chapters stay in the same capability layer, so the focus is on making one idea deeper and more complete.",
+ "progression_cross_layer": "This comparison crosses into a new capability layer, so the important question is how the system boundary changes.",
+ "chapter_distance": "Chapter Distance",
+ "shared_tools_count": "Shared Tools",
+ "new_surface": "New Surface Area",
+ "empty_lead": "No short chapter thesis was extracted for this chapter yet.",
"loc_delta": "LOC Delta",
"lines": "lines",
"new_tools_in_b": "New Tools in B",
@@ -28,49 +159,66 @@
"only_in": "Only in",
"shared": "Shared",
"none": "None",
- "source_diff": "Source Code Diff",
- "empty_hint": "Select two versions above to compare them.",
- "architecture": "Architecture"
+ "source_diff": "Source Diff (Optional)",
+ "source_diff_note": "If you care about implementation detail, read the diff next. If you only care about the mechanism, the learning cards above should be enough.",
+ "empty_hint": "Choose two chapters first, then inspect what capability the upgrade actually adds.",
+ "architecture": "Architecture",
+ "architecture_note": "Read module boundaries and collaboration first, then drop into implementation detail only if you need it."
},
"diff": {
"new_classes": "New Classes",
"new_tools": "New Tools",
"new_functions": "New Functions",
- "loc_delta": "LOC Delta"
+ "loc_delta": "LOC Delta",
+ "view_unified": "Unified",
+ "view_split": "Split"
},
"sessions": {
"s01": "The Agent Loop",
- "s02": "Tools",
+ "s02": "Tool Use",
"s03": "TodoWrite",
- "s04": "Subagents",
+ "s04": "Subagent",
"s05": "Skills",
- "s06": "Compact",
- "s07": "Tasks",
- "s08": "Background Tasks",
- "s09": "Agent Teams",
- "s10": "Team Protocols",
- "s11": "Autonomous Agents",
- "s12": "Worktree + Task Isolation"
+ "s06": "Context Compact",
+ "s07": "Permission System",
+ "s08": "Hook System",
+ "s09": "Memory System",
+ "s10": "System Prompt",
+ "s11": "Error Recovery",
+ "s12": "Task System",
+ "s13": "Background Tasks",
+ "s14": "Cron Scheduler",
+ "s15": "Agent Teams",
+ "s16": "Team Protocols",
+ "s17": "Autonomous Agents",
+ "s18": "Worktree Isolation",
+ "s19": "MCP & Plugin"
},
"layer_labels": {
- "tools": "Tools & Execution",
- "planning": "Planning & Coordination",
- "memory": "Memory Management",
- "concurrency": "Concurrency",
- "collaboration": "Collaboration"
+ "core": "Core Loop",
+ "hardening": "System Hardening",
+ "runtime": "Task Runtime",
+ "platform": "Multi-Agent Platform"
},
"viz": {
"s01": "The Agent While-Loop",
"s02": "Tool Dispatch Map",
- "s03": "TodoWrite Nag System",
+ "s03": "TodoWrite Reminder Loop",
"s04": "Subagent Context Isolation",
"s05": "On-Demand Skill Loading",
- "s06": "Three-Layer Context Compression",
- "s07": "Task Dependency Graph",
- "s08": "Background Task Lanes",
- "s09": "Agent Team Mailboxes",
- "s10": "FSM Team Protocols",
- "s11": "Autonomous Agent Cycle",
- "s12": "Worktree Task Isolation"
+ "s06": "Three-Layer Context Compaction",
+ "s07": "Permission Gate",
+ "s08": "Lifecycle Hook Surface",
+ "s09": "Long-Term vs Short-Term Memory",
+ "s10": "Prompt Assembly Pipeline",
+ "s11": "Recovery Branch State Machine",
+ "s12": "Task Dependency Graph",
+ "s13": "Background Task Lanes",
+ "s14": "Cron Trigger Pipeline",
+ "s15": "Agent Team Mailboxes",
+ "s16": "Protocol Sequence Diagram",
+ "s17": "Autonomous Agent Cycle",
+ "s18": "Worktree Task Isolation",
+ "s19": "External Capability Bus"
}
}
diff --git a/web/src/i18n/messages/ja.json b/web/src/i18n/messages/ja.json
index 25192d20d..8992c43f2 100644
--- a/web/src/i18n/messages/ja.json
+++ b/web/src/i18n/messages/ja.json
@@ -1,76 +1,224 @@
{
- "meta": { "title": "Learn Claude Code", "description": "0 から 1 へ nano Claude Code-like agent を構築し、毎回 1 つの仕組みを追加" },
- "nav": { "home": "ホーム", "timeline": "学習パス", "compare": "バージョン比較", "layers": "アーキテクチャ層", "github": "GitHub" },
- "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "0 から 1 へ nano Claude Code-like agent を構築し、毎回 1 つの仕組みを追加", "start": "学習を始める", "core_pattern": "コアパターン", "core_pattern_desc": "すべての AI コーディングエージェントは同じループを共有する:モデルを呼び出し、ツールを実行し、結果を返す。実運用ではこの上にポリシー、権限、ライフサイクル層が重なる。", "learning_path": "学習パス", "learning_path_desc": "12の段階的セッション、シンプルなループから分離された自律実行まで", "layers_title": "アーキテクチャ層", "layers_desc": "5つの直交する関心事が完全なエージェントを構成", "loc": "行", "learn_more": "詳細を見る", "versions_in_layer": "バージョン", "message_flow": "メッセージの増加", "message_flow_desc": "エージェントループ実行時のメッセージ配列の成長を観察" },
- "version": { "loc": "行のコード", "tools": "ツール", "new": "新規", "prev": "前のバージョン", "next": "次のバージョン", "view_source": "ソースを見る", "view_diff": "差分を見る", "design_decisions": "設計判断", "whats_new": "新機能", "tutorial": "チュートリアル", "simulator": "エージェントループシミュレーター", "execution_flow": "実行フロー", "architecture": "アーキテクチャ", "concept_viz": "コンセプト可視化", "alternatives": "検討された代替案", "tab_learn": "学習", "tab_simulate": "シミュレーション", "tab_code": "ソースコード", "tab_deep_dive": "詳細分析" },
- "sim": { "play": "再生", "pause": "一時停止", "step": "ステップ", "reset": "リセット", "speed": "速度", "step_of": "/" },
- "timeline": { "title": "学習パス", "subtitle": "s01からs12へ:段階的エージェント設計", "layer_legend": "レイヤー凡例", "loc_growth": "コード量の推移", "learn_more": "詳細を見る" },
+ "meta": {
+ "title": "Learn Claude Code",
+ "description": "19章・4段階で、0から高完成度の Claude Code-like agent を組み立てる"
+ },
+ "nav": {
+ "home": "ホーム",
+ "reference": "リファレンス",
+ "compare": "比較",
+ "github": "GitHub"
+ },
+ "reference": {
+ "title": "リファレンス",
+ "subtitle": "用語集、アーキテクチャ地図、深掘り補助ドキュメント。",
+ "foundation_title": "基礎ドキュメント",
+ "deep_dive_title": "深掘りドキュメント"
+ },
+ "home": {
+ "hero_title": "Learn Claude Code",
+ "hero_subtitle": "最小ループからマルチエージェント基盤と外部 capability bus まで、19章を4段階で学ぶ",
+ "start": "学習を始める",
+ "entry_title": "最初に選びやすい4つの入口",
+ "entry_desc": "初めて来たなら、いきなり章をばらばらに開かない方がよいです。順番に進むのか、段階で見るのか、差分で見るのかを先に決めます。",
+ "entry_start_title": "最小ループから始める",
+ "entry_start_desc": "初学者向けの最良の入口です。まず最小の agent loop を動かし、その後に tool、planning、context 管理を足します。",
+ "entry_start_action": "s01 を開く",
+ "entry_timeline_title": "主線を順番にたどる",
+ "entry_timeline_desc": "教材の流れを最初から最後まで追いたい人向けです。システムが章ごとにどう育つかを見られます。",
+ "entry_timeline_action": "タイムラインへ",
+ "entry_layers_title": "先に4段階をつかむ",
+ "entry_layers_desc": "章に入る前に、なぜこの教材が段階に分かれているのかを先につかみたい人向けです。",
+ "entry_layers_action": "段階ページへ",
+ "entry_compare_title": "詰まったら2章を比較する",
+ "entry_compare_desc": "学習途中で章の境界がぼやけたときに向いています。ある章が前の章に対して何を増やしたのかを見やすくします。",
+ "entry_compare_action": "比較ページへ",
+ "core_pattern": "コアパターン",
+ "core_pattern_desc": "すべての AI コーディングエージェントは同じループから成長する。モデルを呼び、ツールを実行し、結果を戻す。権限、記憶、タスク、チーム、プラグインはこのループを置き換えるのではなく拡張する。",
+ "learning_path": "学習パス",
+ "learning_path_desc": "19の段階的な章を、コアループ・システム強化・タスクランタイム・マルチエージェント基盤の4段階で構成",
+ "layers_title": "4つの段階",
+ "layers_desc": "細部を一気に詰め込むのではなく、開発者が無理なく再実装できる順序でシステムを学ぶための分解です。",
+ "guide_label": "読み方",
+ "guide_start_title": "最初はコアループを固める",
+ "guide_start_desc": "s01-s06 がまだ身体に入っていないなら、マルチエージェントや MCP へ急がず、まず単一 agent の主ループを固めます。",
+ "guide_middle_title": "中盤では状態の層を分ける",
+ "guide_middle_desc": "task、runtime task、teammate、worktree が混ざり始めたら、段階境界とデータ構造の地図へ一度戻るべきです。",
+ "guide_finish_title": "後半では基盤境界を見る",
+ "guide_finish_desc": "s15 以降は、機能が増えること自体よりも、単一実行者から複数実行レーンと外部能力へどう境界が広がるかが大事です。",
+ "loc": "行",
+ "learn_more": "詳しく見る",
+ "versions_in_layer": "章",
+ "message_flow": "メッセージの増加",
+ "message_flow_desc": "実際の agent ループで messages[] がどのように伸びていくかを観察する"
+ },
+ "version": {
+ "loc": "行のコード",
+ "tools": "ツール",
+ "new": "新規",
+ "prev": "前の章",
+ "next": "次の章",
+ "view_source": "ソースを見る",
+ "view_diff": "差分を見る",
+ "design_decisions": "設計判断",
+ "whats_new": "新しく加わるもの",
+ "tutorial": "チュートリアル",
+ "simulator": "章シミュレーター",
+ "execution_flow": "実行フロー",
+ "architecture": "アーキテクチャ",
+ "concept_viz": "概念可視化",
+ "alternatives": "検討した代替案",
+ "tab_learn": "学習",
+ "tab_simulate": "シミュレーション",
+ "tab_code": "コード",
+ "tab_deep_dive": "深掘り",
+ "guide_label": "章ガイド",
+ "guide_addition_title": "この章で増える中核構造",
+ "guide_addition_empty": "この章は、まったく新しい大きなモジュールを足すというより、既存の構造をつなぎ直して強める章です。",
+ "guide_focus_title": "最初に注目する点",
+ "guide_focus_fallback": "実装の細部へ入る前に、まず入力・状態・出力の関係を見ます。",
+ "guide_confusion_title": "混同しやすい点",
+ "guide_confusion_fallback": "概念がぼやけたら、この仕組みが何を解決し、前章より何の能力を増やしたのかへ戻ります。",
+ "guide_goal_title": "学習後にできるべきこと",
+ "guide_goal_fallback": "この章の後には、この仕組みを自分の agent system へつなぎ戻せる状態を目指します。",
+ "bridge_docs_label": "補助資料",
+ "bridge_docs_title": "深く入る前に見ておく地図",
+ "bridge_docs_intro": "この章で混同しやすい境界や仕組みを補うための橋渡し資料です。",
+ "bridge_docs_open": "資料を開く",
+ "bridge_docs_kind_map": "地図",
+ "bridge_docs_kind_mechanism": "仕組み",
+ "bridge_docs_fallback": "フォールバック",
+ "bridge_docs_back": "学習パスへ戻る",
+ "bridge_docs_standalone": "補助ドキュメント",
+ "bridge_docs_fallback_note": "この言語では未提供のため、次の言語へフォールバックしています:"
+ },
+ "sim": {
+ "play": "再生",
+ "pause": "一時停止",
+ "step": "ステップ",
+ "previous_step": "前のステップ",
+ "next_step": "次のステップ",
+ "autoplay": "自動再生",
+ "reset": "リセット",
+ "speed": "速度",
+ "step_of": "/"
+ },
+ "timeline": {
+ "title": "学習パス",
+ "subtitle": "s01 から s19 まで、4段階で agent システムを積み上げる",
+ "layer_legend": "段階の凡例",
+ "loc_growth": "コード量の成長",
+ "learn_more": "詳しく見る"
+ },
"layers": {
- "title": "アーキテクチャ層",
- "subtitle": "5つの直交する関心事が完全なエージェントを構成",
- "tools": "エージェントができること。基盤:ツールがモデルに外部世界と対話する能力を与える。",
- "planning": "作業の組織化。シンプルなToDoリストからエージェント間で共有される依存関係対応タスクボードまで。",
- "memory": "コンテキスト制限内での記憶保持。圧縮戦略によりエージェントが一貫性を失わずに無限に作業可能。",
- "concurrency": "ノンブロッキング実行。バックグラウンドスレッドと通知バスによる並列作業。",
- "collaboration": "マルチエージェント連携。チーム、メッセージング、自律的に考えるチームメイト。"
+ "title": "アーキテクチャ段階",
+ "subtitle": "最小の動くループから始めて、安全性、永続状態、バックグラウンド実行、マルチエージェント協調、外部 capability 連携へと進む。",
+ "guide_label": "読み方",
+ "guide_start_title": "最初に読むなら",
+ "guide_start_desc": "第1段階から始めます。後のすべての能力はコアループの外側に積み上がるため、そこを飛ばさない方がよいです。",
+ "guide_middle_title": "途中でぼやけたら",
+ "guide_middle_desc": "まず今の章がどの段階に属するかを確認し、それが安全性・状態・ランタイム・基盤のどれを足しているのかを見ます。",
+ "guide_finish_title": "自作したいなら",
+ "guide_finish_desc": "各段階の終わりごとに、1つの実装可能なシステムの塊が手元に残るべきで、単なる用語集になってはいけません。",
+ "core": "コアループ: ツール、計画、委譲、スキル、コンテキスト圧縮を含む最小の単一エージェントを作る段階。",
+ "core_outcome": "この段階を終えたら、動く単一 agent harness を自力で書けるはずです。",
+ "hardening": "システム強化: 権限、Hook、記憶、Prompt 組み立て、エラー回復によって「動く」から「安全に予測可能に動く」へ進める段階。",
+ "hardening_outcome": "この段階を終えたら、agent をより安全に、安定的に、拡張しやすくできるはずです。",
+ "runtime": "タスクランタイム: 作業をセッション内の一時的な計画から、永続・バックグラウンド・時間起点の実行へ変える段階。",
+ "runtime_outcome": "この段階を終えたら、会話レベルの手順を永続的なタスク実行系へ持ち上げられるはずです。",
+ "platform": "マルチエージェント基盤: 永続チームメイト、プロトコル、自律動作、分離実行レーン、MCP / Plugin capability routing を加える段階。",
+ "platform_outcome": "この段階を終えたら、単一 agent を協調基盤へ育てられるはずです。"
},
"compare": {
- "title": "バージョン比較",
- "subtitle": "任意の2つのバージョン間の変更を確認",
- "select_a": "バージョンA",
- "select_b": "バージョンB",
- "loc_delta": "コード量の差分",
+ "title": "学習パス比較",
+ "subtitle": "2つの章のあいだで何の能力が増えるのか、なぜそこで導入されるのか、学習時にどこへ注目すべきかを比べる。",
+ "learning_jump": "学習ジャンプ",
+ "selector_title": "まず比べたい一歩を選ぶ",
+ "selector_note": "このページは、先に能力境界の変化を理解させ、そのあとで必要なら実装詳細へ入る構成です。",
+ "select_a": "章 A",
+ "select_b": "章 B",
+ "select_placeholder": "-- 選択してください --",
+ "carry_from_a": "A から持ち帰るもの",
+ "new_in_b": "B で増えるもの",
+ "progression": "進み方",
+ "progression_same_chapter": "同じ章を選んでいます。章単体の読み直しには向きますが、能力のジャンプを見る比較ではありません。",
+ "progression_reverse": "これは後ろ向きの比較です。後の章の能力が、実はどの早い章から来ているかを見るのに向いています。",
+ "progression_direct": "これは隣り合う一歩です。システムが章ごとに自然に育つ流れを学ぶのに最も向いています。",
+ "progression_same_layer": "両方とも同じ能力レイヤーにあるため、主眼は新概念よりも同じ概念を厚く完成させる点にあります。",
+ "progression_cross_layer": "この比較は新しい能力レイヤーへまたがるため、重要なのはシステム境界がどう変わるかです。",
+ "chapter_distance": "章の距離",
+ "shared_tools_count": "共通ツール",
+ "new_surface": "新しい実装面",
+ "empty_lead": "この章の短い要旨はまだ抽出されていません。",
+ "loc_delta": "コード量差分",
"lines": "行",
- "new_tools_in_b": "Bの新規ツール",
- "new_classes_in_b": "Bの新規クラス",
- "new_functions_in_b": "Bの新規関数",
+ "new_tools_in_b": "Bで増えるツール",
+ "new_classes_in_b": "Bで増えるクラス",
+ "new_functions_in_b": "Bで増える関数",
"tool_comparison": "ツール比較",
"only_in": "のみ",
"shared": "共通",
"none": "なし",
- "source_diff": "ソースコード差分",
- "empty_hint": "上で2つのバージョンを選択して比較してください。",
- "architecture": "アーキテクチャ"
+ "source_diff": "ソース差分(任意)",
+ "source_diff_note": "実装の展開まで追いたい場合だけ diff を見れば十分です。機構だけ知りたいなら、その前の学習カードで足ります。",
+ "empty_hint": "先に2つの章を選び、そのアップグレードが実際に何を増やすのか見てください。",
+ "architecture": "アーキテクチャ",
+ "architecture_note": "まずモジュール境界と協調関係を見て、そのあと必要なら実装詳細へ入ります。"
},
"diff": {
"new_classes": "新規クラス",
"new_tools": "新規ツール",
"new_functions": "新規関数",
- "loc_delta": "コード量の差分"
+ "loc_delta": "コード量差分",
+ "view_unified": "統合表示",
+ "view_split": "分割表示"
},
"sessions": {
"s01": "エージェントループ",
- "s02": "ツール",
- "s03": "TodoWrite",
+ "s02": "ツール使用",
+ "s03": "Todo 書き込み",
"s04": "サブエージェント",
"s05": "スキル",
"s06": "コンテキスト圧縮",
- "s07": "タスクシステム",
- "s08": "バックグラウンドタスク",
- "s09": "エージェントチーム",
- "s10": "チームプロトコル",
- "s11": "自律エージェント",
- "s12": "Worktree + タスク分離"
+ "s07": "権限システム",
+ "s08": "Hook システム",
+ "s09": "記憶システム",
+ "s10": "システムプロンプト",
+ "s11": "エラー回復",
+ "s12": "タスクシステム",
+ "s13": "バックグラウンドタスク",
+ "s14": "Cron スケジューラ",
+ "s15": "エージェントチーム",
+ "s16": "チームプロトコル",
+ "s17": "自律エージェント",
+ "s18": "Worktree 分離",
+ "s19": "MCP とプラグイン"
},
"layer_labels": {
- "tools": "ツールと実行",
- "planning": "計画と調整",
- "memory": "メモリ管理",
- "concurrency": "並行処理",
- "collaboration": "コラボレーション"
+ "core": "コアループ",
+ "hardening": "システム強化",
+ "runtime": "タスクランタイム",
+ "platform": "マルチエージェント基盤"
},
"viz": {
- "s01": "エージェント Whileループ",
+ "s01": "Agent 主ループ",
"s02": "ツールディスパッチマップ",
- "s03": "TodoWrite リマインドシステム",
- "s04": "サブエージェント コンテキスト分離",
- "s05": "オンデマンド スキルローディング",
+ "s03": "Todo 書き込みリマインダーループ",
+ "s04": "サブエージェントのコンテキスト分離",
+ "s05": "オンデマンドスキル読み込み",
"s06": "3層コンテキスト圧縮",
- "s07": "タスク依存関係グラフ",
- "s08": "バックグラウンドタスクレーン",
- "s09": "エージェントチーム メールボックス",
- "s10": "FSM チームプロトコル",
- "s11": "自律エージェントサイクル",
- "s12": "Worktree タスク分離"
+ "s07": "権限ゲート",
+ "s08": "ライフサイクル Hook 面",
+ "s09": "長期記憶と短期記憶の分離",
+ "s10": "Prompt 組み立てパイプライン",
+ "s11": "回復分岐ステートマシン",
+ "s12": "タスク依存グラフ",
+ "s13": "バックグラウンドタスクレーン",
+ "s14": "Cron トリガーパイプライン",
+ "s15": "エージェントチームのメールボックス",
+ "s16": "プロトコル時系列図",
+ "s17": "自律エージェントサイクル",
+ "s18": "Worktree タスク分離",
+ "s19": "外部 capability bus"
}
}
diff --git a/web/src/i18n/messages/zh.json b/web/src/i18n/messages/zh.json
index a8d9f3651..9cf41e827 100644
--- a/web/src/i18n/messages/zh.json
+++ b/web/src/i18n/messages/zh.json
@@ -1,24 +1,155 @@
{
- "meta": { "title": "Learn Claude Code", "description": "从 0 到 1 构建 nano Claude Code-like agent,每次只加一个机制" },
- "nav": { "home": "首页", "timeline": "学习路径", "compare": "版本对比", "layers": "架构层", "github": "GitHub" },
- "home": { "hero_title": "Learn Claude Code", "hero_subtitle": "从 0 到 1 构建 nano Claude Code-like agent,每次只加一个机制", "start": "开始学习", "core_pattern": "核心模式", "core_pattern_desc": "所有 AI 编程 Agent 共享同一个循环:调用模型、执行工具、回传结果。生产级系统会在其上叠加策略、权限和生命周期层。", "learning_path": "学习路径", "learning_path_desc": "12 个渐进式课程,从简单循环到隔离化自治执行", "layers_title": "架构层次", "layers_desc": "五个正交关注点组合成完整的 Agent", "loc": "行", "learn_more": "了解更多", "versions_in_layer": "个版本", "message_flow": "消息增长", "message_flow_desc": "观察 Agent 循环执行时消息数组的增长" },
- "version": { "loc": "行代码", "tools": "个工具", "new": "新增", "prev": "上一版", "next": "下一版", "view_source": "查看源码", "view_diff": "查看变更", "design_decisions": "设计决策", "whats_new": "新增内容", "tutorial": "教程", "simulator": "Agent 循环模拟器", "execution_flow": "执行流程", "architecture": "架构", "concept_viz": "概念可视化", "alternatives": "替代方案", "tab_learn": "学习", "tab_simulate": "模拟", "tab_code": "源码", "tab_deep_dive": "深入探索" },
- "sim": { "play": "播放", "pause": "暂停", "step": "单步", "reset": "重置", "speed": "速度", "step_of": "/" },
- "timeline": { "title": "学习路径", "subtitle": "s01 到 s12:渐进式 Agent 设计", "layer_legend": "层次图例", "loc_growth": "代码量增长", "learn_more": "了解更多" },
+ "meta": {
+ "title": "Learn Claude Code",
+ "description": "19 章节、4 个阶段,带你从 0 到 1 手搓一个结构完整的 Claude Code-like Agent"
+ },
+ "nav": {
+ "home": "首页",
+ "reference": "参考资料",
+ "compare": "版本对比",
+ "github": "GitHub"
+ },
+ "reference": {
+ "title": "参考资料",
+ "subtitle": "术语表、架构地图与深入阅读补充文档。",
+ "foundation_title": "基础文档",
+ "deep_dive_title": "深入阅读"
+ },
+ "home": {
+ "hero_title": "Learn Claude Code",
+ "hero_subtitle": "19 章节、4 个阶段,从最小闭环一路搭到多 Agent 平台与外部能力总线",
+ "start": "开始学习",
+ "entry_title": "四个最推荐的学习入口",
+ "entry_desc": "如果你是第一次进站,不要急着随机点章节。先决定你要按顺序学、按阶段学,还是只看两步之间的能力跃迁。",
+ "entry_start_title": "直接从最小闭环开始",
+ "entry_start_desc": "适合第一次接触这套仓库的读者。先把最小 agent loop 跑通,再往后加工具、规划和上下文管理。",
+ "entry_start_action": "打开 s01",
+ "entry_timeline_title": "按主线顺序一路学",
+ "entry_timeline_desc": "适合想完整跟一遍教学节奏的人。你会按章节顺序看到系统是怎么自然长出来的。",
+ "entry_timeline_action": "打开时间线",
+ "entry_layers_title": "先按四阶段理解全局",
+ "entry_layers_desc": "适合先想弄明白“为什么要这样分阶段”的读者。你会先看到单 agent、加固、任务运行时、多 agent 平台这四层。",
+ "entry_layers_action": "打开阶段页",
+ "entry_compare_title": "卡住时看两章之间差了什么",
+ "entry_compare_desc": "适合已经读到一半,但开始混淆章节边界的读者。它会帮助你看清这一章到底比上一章多了什么能力。",
+ "entry_compare_action": "打开对比页",
+ "core_pattern": "核心模式",
+ "core_pattern_desc": "所有 AI 编程 Agent 的底层都围绕同一个闭环:调用模型、执行工具、回传结果。后面的权限、记忆、任务、团队与插件能力,都是围绕这个闭环继续搭起来的控制层。",
+ "learning_path": "学习路径",
+ "learning_path_desc": "19 个递进章节,按照核心闭环、系统加固、任务运行时、多 Agent 平台四个阶段展开",
+ "layers_title": "四个阶段",
+ "layers_desc": "不是把细节堆满,而是按开发者心智把一个完整 Agent 系统拆成四个逐步成立的阶段",
+ "guide_label": "阅读提示",
+ "guide_start_title": "第一次读:先抓主闭环",
+ "guide_start_desc": "如果你还没真的把 s01-s06 跑通,不要急着钻多 agent 或 MCP。先把单 agent 主闭环建立起来。",
+ "guide_middle_title": "读到中段:先分清状态层",
+ "guide_middle_desc": "当你开始分不清 task、runtime task、teammate、worktree 这些词时,说明你该回头看阶段边界与数据结构地图。",
+ "guide_finish_title": "读到后段:盯住平台边界",
+ "guide_finish_desc": "进入 s15-s19 后,重点不再只是多几个功能,而是系统边界如何从单执行者升级成多执行通道和外部能力总线。",
+ "loc": "行",
+ "learn_more": "了解更多",
+ "versions_in_layer": "个章节",
+ "message_flow": "消息增长",
+ "message_flow_desc": "观察一次真实 Agent 闭环中,messages[] 如何随着工具调用不断增长"
+ },
+ "version": {
+ "loc": "行代码",
+ "tools": "个工具",
+ "new": "新增",
+ "prev": "上一章",
+ "next": "下一章",
+ "view_source": "查看源码",
+ "view_diff": "查看变更",
+ "design_decisions": "设计决策",
+ "whats_new": "新增内容",
+ "tutorial": "教程",
+ "simulator": "章节模拟器",
+ "execution_flow": "执行流程",
+ "architecture": "架构",
+ "concept_viz": "概念可视化",
+ "alternatives": "替代方案",
+ "tab_learn": "学习",
+ "tab_simulate": "模拟",
+ "tab_code": "源码",
+ "tab_deep_dive": "深入探索",
+ "guide_label": "本章导读",
+ "guide_addition_title": "这章新增的核心结构",
+ "guide_addition_empty": "这一章主要是在前一章之上把已有结构串起来,而不是单独引入一个全新的大模块。",
+ "guide_focus_title": "先盯住什么",
+ "guide_focus_fallback": "先把这一章最关键的输入、状态和输出关系看清,不要一开始就钻到实现细枝末节里。",
+ "guide_confusion_title": "最容易混淆什么",
+ "guide_confusion_fallback": "如果概念开始混在一起,就回到“这个机制到底解决什么问题、它和前一章相比多了哪一层能力”。",
+ "guide_goal_title": "学完应该会什么",
+ "guide_goal_fallback": "学完这一章后,你应该能把这里新增的机制独立接回自己的 agent 主系统。",
+ "bridge_docs_label": "桥接资料",
+ "bridge_docs_title": "继续往下前,先补这几张地图",
+ "bridge_docs_intro": "这些资料不是旁枝细节,而是专门用来补当前章节最容易混淆的结构边界和主线机制。",
+ "bridge_docs_open": "打开补充页",
+ "bridge_docs_kind_map": "结构地图",
+ "bridge_docs_kind_mechanism": "机制展开",
+ "bridge_docs_fallback": "内容回退",
+ "bridge_docs_back": "回到学习主线",
+ "bridge_docs_standalone": "桥接文档",
+ "bridge_docs_fallback_note": "当前语言暂无该文档,已自动回退到:"
+ },
+ "sim": {
+ "play": "播放",
+ "pause": "暂停",
+ "step": "单步",
+ "previous_step": "上一步",
+ "next_step": "下一步",
+ "autoplay": "自动播放",
+ "reset": "重置",
+ "speed": "速度",
+ "step_of": "/"
+ },
+ "timeline": {
+ "title": "学习路径",
+ "subtitle": "s01 到 s19:按 4 个阶段渐进搭建一个结构完整、接近真实主脉络的 Agent 系统",
+ "layer_legend": "阶段图例",
+ "loc_growth": "代码量增长",
+ "learn_more": "了解更多"
+ },
"layers": {
- "title": "架构层次",
- "subtitle": "五个正交关注点组合成完整的 Agent",
- "tools": "Agent 能做什么。基础层:工具赋予模型与外部世界交互的能力。",
- "planning": "如何组织工作。从简单的待办列表到跨 Agent 共享的依赖感知任务板。",
- "memory": "在上下文限制内保持记忆。压缩策略让 Agent 可以无限工作而不失去连贯性。",
- "concurrency": "非阻塞执行。后台线程和通知总线实现并行工作。",
- "collaboration": "多 Agent 协作。团队、消息传递和能独立思考的自主队友。"
+ "title": "架构阶段",
+ "subtitle": "从最小可运行闭环开始,逐步加上安全、持久状态、后台运行、多 Agent 协作与外部能力接入",
+ "guide_label": "怎么读",
+ "guide_start_title": "如果你是第一次学",
+ "guide_start_desc": "从第一阶段开始,不要跳过核心闭环。后面所有能力都是包在这条主循环外面的。",
+ "guide_middle_title": "如果你读到中途开始混",
+ "guide_middle_desc": "先看当前章节属于哪一阶段,再判断它是在补安全、补状态、补运行时,还是在扩平台边界。",
+ "guide_finish_title": "如果你准备自己实现",
+ "guide_finish_desc": "每学完一个阶段,都应该手里多出一块真正成立的系统,而不是只记住一些名词。",
+ "core": "核心闭环:先把单 Agent 最基本的工作回路搭起来,包括工具、计划、委托、技能加载和上下文压缩。",
+ "core_outcome": "学完本阶段,你应该能独立写出一个真正能工作的单 Agent harness。",
+ "hardening": "系统加固:把“能跑”推进到“稳定可控地跑”,重点是权限、Hook、长期记忆、Prompt 装配与错误恢复。",
+ "hardening_outcome": "学完本阶段,你应该知道怎样让 agent 不只是能跑,而是更稳、更安全、更容易扩展。",
+ "runtime": "任务运行时:把工作从一次会话内的临时计划,提升成可持久、可后台、可定时触发的任务执行系统。",
+ "runtime_outcome": "学完本阶段,你应该能把聊天内的步骤,升级成真正可持久推进的任务系统。",
+ "platform": "多 Agent 平台:引入长期存在的队友、协议、自治行为、隔离执行工作区,以及 MCP / Plugin 外部能力总线。",
+ "platform_outcome": "学完本阶段,你应该能把单 agent 升级成多执行者协作的平台。"
},
"compare": {
- "title": "版本对比",
- "subtitle": "查看任意两个版本之间的变化",
- "select_a": "版本 A",
- "select_b": "版本 B",
+ "title": "学习路径对比",
+ "subtitle": "比较两个章节之间新增了什么能力、为什么在这里引入,以及学习时该先盯住哪条主线。",
+ "learning_jump": "学习跃迁",
+ "selector_title": "先决定你要比较哪一步升级",
+ "selector_note": "这页优先帮助你理解能力边界的变化,而不是先把你拖进源码细节里。",
+ "select_a": "章节 A",
+ "select_b": "章节 B",
+ "select_placeholder": "-- 请选择 --",
+ "carry_from_a": "从 A 带走",
+ "new_in_b": "B 新引入",
+ "progression": "推进关系",
+ "progression_same_chapter": "你选中了同一章。适合回看这一章本身的架构与源码,不适合看能力跃迁。",
+ "progression_reverse": "这是一次回看式对比,适合观察哪些能力其实来自更早的章节。",
+ "progression_direct": "这是紧邻的一步升级,最适合按教程顺序学习系统是如何自然长出来的。",
+ "progression_same_layer": "两章仍在同一能力阶段内,重点是看机制如何从“能跑”变成“更稳、更完整”。",
+ "progression_cross_layer": "这次比较跨越了能力阶段,重点是看系统边界怎样被重新定义。",
+ "chapter_distance": "相隔章节",
+ "shared_tools_count": "共有工具",
+ "new_surface": "新增实现面",
+ "empty_lead": "该章节的核心一句话摘要暂未提取出来。",
"loc_delta": "代码量差异",
"lines": "行",
"new_tools_in_b": "B 中新增工具",
@@ -28,49 +159,66 @@
"only_in": "仅在",
"shared": "共有",
"none": "无",
- "source_diff": "源码差异",
- "empty_hint": "请在上方选择两个版本进行对比。",
- "architecture": "架构"
+ "source_diff": "源码差异(选看)",
+ "source_diff_note": "如果你在意实现展开,可以再看源码 diff;如果你只关心机制,前面的学习卡片已经足够。",
+ "empty_hint": "先选两个章节,再看这次升级到底新增了什么能力。",
+ "architecture": "架构视图",
+ "architecture_note": "先看模块边界和协作关系,再决定要不要往下钻实现细节。"
},
"diff": {
"new_classes": "新增类",
"new_tools": "新增工具",
"new_functions": "新增函数",
- "loc_delta": "代码量差异"
+ "loc_delta": "代码量差异",
+ "view_unified": "统一视图",
+ "view_split": "分栏视图"
},
"sessions": {
- "s01": "Agent Loop",
- "s02": "Tool Use",
- "s03": "TodoWrite",
- "s04": "Subagent",
- "s05": "Skills",
- "s06": "Context Compact",
- "s07": "Task System",
- "s08": "Background Tasks",
- "s09": "Agent Teams",
- "s10": "Team Protocols",
- "s11": "Autonomous Agents",
- "s12": "Worktree + Task Isolation"
+ "s01": "Agent 循环",
+ "s02": "工具使用",
+ "s03": "待办写入",
+ "s04": "子代理",
+ "s05": "技能系统",
+ "s06": "上下文压缩",
+ "s07": "权限系统",
+ "s08": "Hook 系统",
+ "s09": "记忆系统",
+ "s10": "系统提示词",
+ "s11": "错误恢复",
+ "s12": "任务系统",
+ "s13": "后台任务",
+ "s14": "定时调度",
+ "s15": "Agent 团队",
+ "s16": "团队协议",
+ "s17": "自主代理",
+ "s18": "Worktree 隔离",
+ "s19": "MCP 与插件"
},
"layer_labels": {
- "tools": "工具与执行",
- "planning": "规划与协调",
- "memory": "记忆管理",
- "concurrency": "并发",
- "collaboration": "协作"
+ "core": "核心闭环",
+ "hardening": "系统加固",
+ "runtime": "任务运行时",
+ "platform": "多 Agent 平台"
},
"viz": {
- "s01": "Agent While-Loop",
- "s02": "Tool Dispatch Map",
- "s03": "TodoWrite Nag System",
- "s04": "Subagent Context Isolation",
- "s05": "On-Demand Skill Loading",
- "s06": "Three-Layer Context Compact",
- "s07": "Task Dependency Graph",
- "s08": "Background Task Lanes",
- "s09": "Agent Team Mailboxes",
- "s10": "FSM Team Protocols",
- "s11": "Autonomous Agent Cycle",
- "s12": "Worktree Task Isolation"
+ "s01": "Agent 主循环",
+ "s02": "工具分发映射",
+ "s03": "待办写入提醒系统",
+ "s04": "子代理上下文隔离",
+ "s05": "按需技能加载",
+ "s06": "三层上下文压缩",
+ "s07": "权限判定闸门",
+ "s08": "生命周期 Hook 面板",
+ "s09": "长短期记忆分层",
+ "s10": "Prompt 装配流水线",
+ "s11": "恢复分支状态机",
+ "s12": "任务依赖图",
+ "s13": "后台任务通道",
+ "s14": "定时触发流水线",
+ "s15": "Agent 团队邮箱",
+ "s16": "协议时序图",
+ "s17": "自主代理循环",
+ "s18": "Worktree 任务隔离",
+ "s19": "外部能力总线"
}
}
diff --git a/web/src/lib/bridge-docs.ts b/web/src/lib/bridge-docs.ts
new file mode 100644
index 000000000..130385a2b
--- /dev/null
+++ b/web/src/lib/bridge-docs.ts
@@ -0,0 +1,344 @@
+import { VERSION_ORDER, type VersionId } from "@/lib/constants";
+
+type SupportedLocale = "zh" | "en" | "ja";
+type BridgeKind = "map" | "mechanism";
+
+export interface BridgeDocDescriptor {
+ slug: string;
+ kind: BridgeKind;
+ title: Record;
+ summary: Record;
+}
+
+export const BRIDGE_DOCS: Record = {
+ "s00-architecture-overview": {
+ slug: "s00-architecture-overview",
+ kind: "map",
+ title: {
+ zh: "系统全景总览",
+ en: "Architecture Overview",
+ ja: "アーキテクチャ全体図",
+ },
+ summary: {
+ zh: "先看系统全貌,再回到当前章节,能更快分清这一层到底属于哪里。",
+ en: "The big-picture map. Come back here whenever you feel lost about where a chapter fits.",
+ ja: "全体像を先に見てから現在の章へ戻るための俯瞰図です。",
+ },
+ },
+ "s00a-query-control-plane": {
+ slug: "s00a-query-control-plane",
+ kind: "mechanism",
+ title: {
+ zh: "查询控制平面",
+ en: "Query Control Plane",
+ ja: "クエリ制御プレーン",
+ },
+ summary: {
+ zh: "把一次请求如何穿过控制平面讲完整,适合权限、Prompt、MCP 这些章节前后补看。",
+ en: "Why the simple loop needs a coordination layer as the system grows. Best read after Stage 1.",
+ ja: "1つの要求が control plane をどう通るかを通しで補う資料です。",
+ },
+ },
+ "s00b-one-request-lifecycle": {
+ slug: "s00b-one-request-lifecycle",
+ kind: "mechanism",
+ title: {
+ zh: "一次请求生命周期",
+ en: "One Request Lifecycle",
+ ja: "1 リクエストのライフサイクル",
+ },
+ summary: {
+ zh: "把一次请求从进入、执行到回写走完一遍,适合主线开始混时回头校正心智。",
+ en: "Traces one request from entry to write-back. Best read after Stage 2 when pieces need connecting.",
+ ja: "1回の要求を入口から write-back まで通して確認する補助資料です。",
+ },
+ },
+ "s00c-query-transition-model": {
+ slug: "s00c-query-transition-model",
+ kind: "mechanism",
+ title: {
+ zh: "Query 续行模型",
+ en: "Query Transition Model",
+ ja: "クエリ遷移モデル",
+ },
+ summary: {
+ zh: "专门讲一条 query 为什么继续下一轮,适合恢复、压缩、预算、hook 开始缠在一起时回看。",
+ en: "Why each continuation needs an explicit reason. Best read alongside s11 (Error Recovery).",
+ ja: "エラー回復・文脈圧縮・予算制御・hook が重なり始めたときに、query がなぜ次のターンへ続くのかを補う資料です。",
+ },
+ },
+ "s00d-chapter-order-rationale": {
+ slug: "s00d-chapter-order-rationale",
+ kind: "map",
+ title: {
+ zh: "为什么这样安排章节顺序",
+ en: "Why This Chapter Order",
+ ja: "なぜこの章順なのか",
+ },
+ summary: {
+ zh: "专门解释为什么课程要按现在这个顺序展开,适合读者刚进入主线或准备自己重排章节时回看。",
+ en: "Explains why the curriculum is ordered this way and what breaks when the sequence is rearranged.",
+ ja: "なぜこの順序で学ぶのか、順番を崩すと何が混乱するのかを整理する資料です。",
+ },
+ },
+ "s00f-code-reading-order": {
+ slug: "s00f-code-reading-order",
+ kind: "map",
+ title: {
+ zh: "本仓库代码阅读顺序",
+ en: "Code Reading Order",
+ ja: "コード読解順",
+ },
+ summary: {
+ zh: "专门告诉你本地 `agents/*.py` 该按什么顺序打开、每章先盯住哪类状态和函数,避免重新乱翻源码。",
+ en: "Shows which local `agents/*.py` files to open first and what state or functions to inspect before the code turns into noise.",
+ ja: "ローカルの `agents/*.py` をどの順で開き、各章でまずどの状態や関数を見るべきかを整理した読解ガイドです。",
+ },
+ },
+ "s00e-reference-module-map": {
+ slug: "s00e-reference-module-map",
+ kind: "map",
+ title: {
+ zh: "参考仓库模块映射图",
+ en: "Reference Module Map",
+ ja: "参照モジュール対応表",
+ },
+ summary: {
+ zh: "把参考仓库里真正重要的模块簇,和当前课程章节一一对齐,专门用来验证章节顺序是否合理。",
+ en: "Maps the reference repo's real module clusters onto the current curriculum to validate the chapter order.",
+ ja: "参照リポジトリの高信号モジュール群と現在の教材章を対応付け、章順の妥当性を確認する地図です。",
+ },
+ },
+ "s02a-tool-control-plane": {
+ slug: "s02a-tool-control-plane",
+ kind: "mechanism",
+ title: {
+ zh: "工具控制平面",
+ en: "Tool Control Plane",
+ ja: "ツール制御プレーン",
+ },
+ summary: {
+ zh: "专门补工具调用怎样进入统一执行面,适合权限、Hook、MCP 等章节一起看。",
+ en: "Why tools become a coordination layer, not just a lookup table. Best read after s02.",
+ ja: "ツール呼び出しが共通の実行面に入る流れを補う資料です。",
+ },
+ },
+ "s02b-tool-execution-runtime": {
+ slug: "s02b-tool-execution-runtime",
+ kind: "mechanism",
+ title: {
+ zh: "工具执行运行时",
+ en: "Tool Execution Runtime",
+ ja: "ツール実行ランタイム",
+ },
+ summary: {
+ zh: "把工具并发、串行、进度消息、结果顺序和 context 合并这层运行时讲清楚。",
+ en: "How multiple tool calls in one turn get executed safely. Best read after s02.",
+ ja: "tool の並列実行と直列実行、progress 更新、結果順序、context 統合をまとめて補う資料です。",
+ },
+ },
+ glossary: {
+ slug: "glossary",
+ kind: "map",
+ title: {
+ zh: "术语表",
+ en: "Glossary",
+ ja: "用語集",
+ },
+ summary: {
+ zh: "术语一多就先回这里,统一名词边界,避免 task、runtime task、teammate 混在一起。",
+ en: "Bookmark this. Come back whenever you hit an unfamiliar term.",
+ ja: "用語が増えて混ざり始めたときに戻る境界整理用の用語集です。",
+ },
+ },
+ "entity-map": {
+ slug: "entity-map",
+ kind: "map",
+ title: {
+ zh: "对象与模块关系图",
+ en: "Entity Map",
+ ja: "エンティティ地図",
+ },
+ summary: {
+ zh: "按对象和模块关系看系统,适合读到中后段时重新校准模块边界。",
+ en: "Use this when concepts start to blur. It tells you which layer each thing belongs to.",
+ ja: "オブジェクトとモジュール関係から全体を再確認する地図です。",
+ },
+ },
+ "data-structures": {
+ slug: "data-structures",
+ kind: "map",
+ title: {
+ zh: "关键数据结构地图",
+ en: "Data Structure Map",
+ ja: "主要データ構造マップ",
+ },
+ summary: {
+ zh: "把核心记录结构放在一起看,适合任务、运行时、多 Agent 章节反复对照。",
+ en: "Every important record in one place. Use when you lose track of where state lives.",
+ ja: "主要な record 構造を横断的に見直すための資料です。",
+ },
+ },
+ "s10a-message-prompt-pipeline": {
+ slug: "s10a-message-prompt-pipeline",
+ kind: "mechanism",
+ title: {
+ zh: "消息与 Prompt 装配流水线",
+ en: "Message-Prompt Pipeline",
+ ja: "メッセージと Prompt の組み立てパイプライン",
+ },
+ summary: {
+ zh: "专门补消息、Prompt 片段和装配顺序,适合 s10 前后深入看。",
+ en: "The full input pipeline beyond system prompt. Best read alongside s10.",
+ ja: "message と prompt 片をどの順に組み立てるかを補う解説です。",
+ },
+ },
+ "s13a-runtime-task-model": {
+ slug: "s13a-runtime-task-model",
+ kind: "mechanism",
+ title: {
+ zh: "运行时任务模型",
+ en: "Runtime Task Model",
+ ja: "ランタイムタスクモデル",
+ },
+ summary: {
+ zh: "把 task goal、runtime record、notification 三层边界一次讲清。",
+ en: "The most common Stage 3 confusion: two meanings of 'task'. Read between s12 and s13.",
+ ja: "作業目標・実行記録・通知の3層境界をまとめて補う資料です。",
+ },
+ },
+ "s19a-mcp-capability-layers": {
+ slug: "s19a-mcp-capability-layers",
+ kind: "mechanism",
+ title: {
+ zh: "MCP 能力层地图",
+ en: "MCP Capability Layers",
+ ja: "MCP 能力層マップ",
+ },
+ summary: {
+ zh: "把本地工具、插件、MCP server 如何接回同一 capability bus 讲完整。",
+ en: "MCP is more than external tools. This shows the full capability stack. Read alongside s19.",
+ ja: "native tool・plugin・MCP server が 1 つの capability bus へ戻る全体像を補います。",
+ },
+ },
+ "team-task-lane-model": {
+ slug: "team-task-lane-model",
+ kind: "map",
+ title: {
+ zh: "队友-任务-车道模型",
+ en: "Teammate-Task-Lane Model",
+ ja: "チームメイト・タスク・レーンモデル",
+ },
+ summary: {
+ zh: "专门拆清队友、协议请求、任务、运行时槽位和 worktree 执行通道这五层边界。",
+ en: "Five concepts that look similar but live on different layers. Keep open during s15-s18.",
+ ja: "teammate・protocol request・task・runtime slot・worktree lane の 5 層境界を整理します。",
+ },
+ },
+ "teaching-scope": {
+ slug: "teaching-scope",
+ kind: "map",
+ title: {
+ zh: "教学范围与取舍",
+ en: "Teaching Scope",
+ ja: "教材の守備範囲",
+ },
+ summary: {
+ zh: "说明这套教学仓库刻意不讲什么,帮助读者守住主线,不被低价值细节带偏。",
+ en: "What this repo teaches, what it deliberately leaves out, and why.",
+ ja: "この教材が意図的に省いている範囲を示し、主線を守るための資料です。",
+ },
+ },
+};
+
+export const FOUNDATION_DOC_SLUGS = [
+ "s00-architecture-overview",
+ "s00d-chapter-order-rationale",
+ "s00f-code-reading-order",
+ "s00e-reference-module-map",
+ "teaching-scope",
+ "glossary",
+ "data-structures",
+ "entity-map",
+] as const;
+
+export const MECHANISM_DOC_SLUGS = [
+ "s00a-query-control-plane",
+ "s00b-one-request-lifecycle",
+ "s00c-query-transition-model",
+ "s02a-tool-control-plane",
+ "s02b-tool-execution-runtime",
+ "s10a-message-prompt-pipeline",
+ "s13a-runtime-task-model",
+ "team-task-lane-model",
+ "s19a-mcp-capability-layers",
+] as const;
+
+export const RESET_DOC_SLUGS = [
+ "s00a-query-control-plane",
+ "s02b-tool-execution-runtime",
+ "s13a-runtime-task-model",
+ "team-task-lane-model",
+ "s19a-mcp-capability-layers",
+] as const;
+
+export const BRIDGE_DOC_RELATED_VERSIONS: Partial<
+ Record
+> = {
+ "s00-architecture-overview": ["s01", "s07", "s12", "s15"],
+ "s00d-chapter-order-rationale": ["s01", "s12", "s15"],
+ "s00f-code-reading-order": ["s01", "s07", "s12", "s15"],
+ "s00e-reference-module-map": ["s01", "s07", "s12", "s15", "s18", "s19"],
+ glossary: ["s01", "s09", "s16", "s19"],
+ "entity-map": ["s04", "s12", "s15", "s18", "s19"],
+ "data-structures": ["s03", "s09", "s12", "s13", "s18"],
+ "teaching-scope": ["s01", "s05", "s12", "s19"],
+ "s00a-query-control-plane": ["s07", "s10", "s11", "s19"],
+ "s00b-one-request-lifecycle": ["s04", "s11", "s14"],
+ "s00c-query-transition-model": ["s11", "s17"],
+ "s02a-tool-control-plane": ["s02", "s08", "s19"],
+ "s02b-tool-execution-runtime": ["s02", "s07", "s13", "s19"],
+ "s10a-message-prompt-pipeline": ["s10"],
+ "s13a-runtime-task-model": ["s12", "s13", "s14", "s17"],
+ "team-task-lane-model": ["s15", "s16", "s17", "s18"],
+ "s19a-mcp-capability-layers": ["s19"],
+};
+
+export const CHAPTER_BRIDGE_DOCS: Partial> = {
+ s01: ["s00-architecture-overview", "s00d-chapter-order-rationale", "s00f-code-reading-order", "s00e-reference-module-map", "glossary"],
+ s02: ["s02a-tool-control-plane", "s02b-tool-execution-runtime"],
+ s03: ["data-structures", "glossary"],
+ s04: ["entity-map", "s00b-one-request-lifecycle"],
+ s05: ["glossary", "teaching-scope"],
+ s06: ["data-structures", "s00b-one-request-lifecycle"],
+ s07: ["s00f-code-reading-order", "s00a-query-control-plane", "s02b-tool-execution-runtime"],
+ s08: ["s02a-tool-control-plane", "entity-map"],
+ s09: ["data-structures", "glossary"],
+ s10: ["s10a-message-prompt-pipeline", "s00a-query-control-plane"],
+ s11: ["s00c-query-transition-model", "s00b-one-request-lifecycle"],
+ s12: ["s00f-code-reading-order", "data-structures", "entity-map"],
+ s13: ["s13a-runtime-task-model", "s02b-tool-execution-runtime"],
+ s14: ["s13a-runtime-task-model", "s00b-one-request-lifecycle"],
+ s15: ["s00f-code-reading-order", "team-task-lane-model", "entity-map"],
+ s16: ["team-task-lane-model", "glossary"],
+ s17: ["team-task-lane-model", "s13a-runtime-task-model"],
+ s18: ["team-task-lane-model", "data-structures"],
+ s19: ["s19a-mcp-capability-layers", "s02b-tool-execution-runtime"],
+};
+
+export function getBridgeDocDescriptors(version: VersionId): BridgeDocDescriptor[] {
+ return (CHAPTER_BRIDGE_DOCS[version] ?? [])
+ .map((slug) => BRIDGE_DOCS[slug])
+ .filter((doc): doc is BridgeDocDescriptor => Boolean(doc));
+}
+
+export function getChaptersForBridgeDoc(slug: string): VersionId[] {
+ const mappedVersions = BRIDGE_DOC_RELATED_VERSIONS[slug] ?? [];
+ const referencedVersions = Object.entries(CHAPTER_BRIDGE_DOCS)
+ .filter(([, slugs]) => slugs?.includes(slug))
+ .map(([version]) => version as VersionId);
+
+ return VERSION_ORDER.filter((version) =>
+ new Set([...mappedVersions, ...referencedVersions]).has(version)
+ );
+}
diff --git a/web/src/lib/chapter-guides.ts b/web/src/lib/chapter-guides.ts
new file mode 100644
index 000000000..c5398f347
--- /dev/null
+++ b/web/src/lib/chapter-guides.ts
@@ -0,0 +1,344 @@
+import type { VersionId } from "@/lib/constants";
+
+type SupportedLocale = "zh" | "en" | "ja";
+
+export interface ChapterGuide {
+ focus: string;
+ confusion: string;
+ goal: string;
+}
+
+export const CHAPTER_GUIDES: Record> = {
+ s01: {
+ zh: {
+ focus: "先盯住 `messages`、`tool_use` 和 `tool_result` 如何闭环回流。",
+ confusion: "不要把“模型会思考”和“系统能行动”混成一回事,真正让它能行动的是 loop。",
+ goal: "手写一个最小但真实可运行的 agent loop。",
+ },
+ en: {
+ focus: "Focus first on how `messages`, `tool_use`, and `tool_result` close the loop.",
+ confusion: "Do not confuse model reasoning with system action. The loop is what turns thought into work.",
+ goal: "Be able to write a minimal but real agent loop by hand.",
+ },
+ ja: {
+ focus: "まず `messages`、`tool_use`、`tool_result` がどう閉ループを作るかを見る。",
+ confusion: "モデルが考えられることと、システムが行動できることを混同しない。行動を成立させるのは loop です。",
+ goal: "最小でも実際に動く agent loop を自力で書けるようになる。",
+ },
+ },
+ s02: {
+ zh: {
+ focus: "先盯住 `ToolSpec`、`dispatch map` 和 `tool_result` 的对应关系。",
+ confusion: "工具 schema 不是执行函数本身;一个是给模型看的说明,一个是代码里的处理器。",
+ goal: "在不改主循环的前提下,自己加一个新工具。",
+ },
+ en: {
+ focus: "Focus on the relationship between `ToolSpec`, the dispatch map, and `tool_result`.",
+ confusion: "A tool schema is not the handler itself. One describes the tool to the model; the other executes it.",
+ goal: "Add a new tool without changing the main loop.",
+ },
+ ja: {
+ focus: "`ToolSpec`、dispatch map、`tool_result` の対応関係を先に見る。",
+ confusion: "schema は実行関数そのものではありません。片方はモデル向けの説明、もう片方は実装側の handler です。",
+ goal: "主ループを変えずに新しいツールを追加できるようになる。",
+ },
+ },
+ s03: {
+ zh: {
+ focus: "先盯住 `TodoItem` / `PlanState` 这类最小计划状态。",
+ confusion: "todo 只是当前会话里的步骤提醒,不是后面那种持久化任务图。",
+ goal: "让 agent 能把一个大目标拆成可跟踪的小步骤。",
+ },
+ en: {
+ focus: "Focus on the smallest planning state, such as `TodoItem` and `PlanState`.",
+ confusion: "A todo here is a session-level reminder, not the later durable task graph.",
+ goal: "Make the agent break a large goal into trackable steps.",
+ },
+ ja: {
+ focus: "`TodoItem` や `PlanState` のような最小の計画状態を見る。",
+ confusion: "ここでの todo は会話内の手順メモであり、後の永続 task graph とは別物です。",
+ goal: "大きな目標を追跡できる小さな手順へ分解できるようにする。",
+ },
+ },
+ s04: {
+ zh: {
+ focus: "先盯住父 `messages` 和子 `messages` 如何隔离。",
+ confusion: "subagent 的关键不是“又开一次模型调用”,而是“给子任务一个干净上下文”。",
+ goal: "做出一个一次性委派、返回摘要的子 agent。",
+ },
+ en: {
+ focus: "Focus on how parent `messages` and child `messages` stay isolated.",
+ confusion: "The key value of a subagent is not another model call. It is a clean context for the subtask.",
+ goal: "Build a one-shot delegated child agent that returns a summary.",
+ },
+ ja: {
+ focus: "親 `messages` と子 `messages` がどう分離されるかを見る。",
+ confusion: "subagent の本質はモデル呼び出しを増やすことではなく、子タスクへきれいな文脈を与えることです。",
+ goal: "一回限りの委譲を行い、要約を返す子 agent を作れるようになる。",
+ },
+ },
+ s05: {
+ zh: {
+ focus: "先盯住技能的“发现层”和“加载层”是怎么分开的。",
+ confusion: "skill 不是一开始全部塞进 prompt 的大说明书,而是按需加载的知识块。",
+ goal: "做出一个低成本发现、高成本按需读取的技能系统。",
+ },
+ en: {
+ focus: "Focus on how skill discovery and skill loading are kept separate.",
+ confusion: "A skill is not a giant prompt blob loaded upfront. It is knowledge loaded only when needed.",
+ goal: "Build a skill system with cheap discovery and on-demand deep loading.",
+ },
+ ja: {
+ focus: "skill の発見層と読み込み層がどう分かれているかを見る。",
+ confusion: "skill は最初から全部 prompt に入れる巨大説明ではなく、必要時だけ読む知識ブロックです。",
+ goal: "軽い発見と必要時だけの深い読み込みを持つ skill system を作る。",
+ },
+ },
+ s06: {
+ zh: {
+ focus: "先盯住 `persisted output`、`micro compact`、`summary compact` 这三层。",
+ confusion: "压缩不是为了删历史,而是把细节移出活跃上下文,同时保住主线。",
+ goal: "做出一个能长期工作、不被上下文撑爆的最小压缩系统。",
+ },
+ en: {
+ focus: "Focus on the three layers: persisted output, micro compact, and summary compact.",
+ confusion: "Compaction is not about deleting history. It is about moving detail out of the active window while keeping continuity.",
+ goal: "Build a minimal compaction system that keeps long sessions usable.",
+ },
+ ja: {
+ focus: "persisted output、micro compact、summary compact の3層を見る。",
+ confusion: "compact は履歴削除ではなく、細部をアクティブ文脈の外へ移しながら主線を保つことです。",
+ goal: "長い作業でも文脈が破綻しない最小 compact system を作る。",
+ },
+ },
+ s07: {
+ zh: {
+ focus: "先盯住 `PermissionRule`、`PermissionDecision` 和整条 allow / ask / deny 管道。",
+ confusion: "权限系统不是单个 if 判断,而是一条在执行前拦截意图的决策链。",
+ goal: "让危险动作先经过清晰的权限决策,再决定是否执行。",
+ },
+ en: {
+ focus: "Focus on `PermissionRule`, `PermissionDecision`, and the full allow / ask / deny pipeline.",
+ confusion: "A permission system is not one `if` statement. It is a decision chain that intercepts intent before execution.",
+ goal: "Put risky actions behind a clear permission pipeline.",
+ },
+ ja: {
+ focus: "`PermissionRule`、`PermissionDecision`、allow / ask / deny の流れを見る。",
+ confusion: "permission system は単一の if ではなく、実行前に意図を止める判断パイプラインです。",
+ goal: "危険な操作を明確な permission pipeline の後ろに置けるようにする。",
+ },
+ },
+ s08: {
+ zh: {
+ focus: "先盯住 `HookEvent`、`HookResult` 和固定触发时机。",
+ confusion: "hook 不是把逻辑塞回主循环,而是让主循环在固定时机对外发出插口。",
+ goal: "不重写主循环,也能在关键时机扩展行为。",
+ },
+ en: {
+ focus: "Focus on `HookEvent`, `HookResult`, and the fixed trigger points.",
+ confusion: "A hook is not random logic stuffed back into the loop. It is an extension point exposed at a fixed moment.",
+ goal: "Extend behavior at key moments without rewriting the loop.",
+ },
+ ja: {
+ focus: "`HookEvent`、`HookResult`、固定の発火タイミングを見る。",
+ confusion: "hook は主ループへ場当たり的にロジックを戻すことではなく、固定時点の拡張口です。",
+ goal: "主ループを書き換えずに重要なタイミングへ拡張を差し込めるようにする。",
+ },
+ },
+ s09: {
+ zh: {
+ focus: "先盯住 `MemoryEntry` 到底保存哪类信息、为什么不是所有上下文都进 memory。",
+ confusion: "memory 不是万能笔记本,它只保存跨会话仍然有价值、又不容易重新推导的信息。",
+ goal: "做出一个小而准的长期记忆层,而不是把上下文原样倾倒进去。",
+ },
+ en: {
+ focus: "Focus on what belongs in `MemoryEntry`, and why not all context should become memory.",
+ confusion: "Memory is not a universal notebook. It only stores knowledge that still matters across sessions and is not cheap to re-derive.",
+ goal: "Build a small, precise long-term memory layer instead of dumping raw context into storage.",
+ },
+ ja: {
+ focus: "`MemoryEntry` に何を入れるべきか、なぜ全部の文脈を memory にしないのかを見る。",
+ confusion: "memory は万能ノートではなく、会話をまたいで意味があり再導出しにくい情報だけを残します。",
+ goal: "文脈を丸ごと捨て込まない、小さく正確な長期記憶層を作る。",
+ },
+ },
+ s10: {
+ zh: {
+ focus: "先盯住 `PromptParts` 和输入组装顺序,而不是只盯一段大 prompt 字符串。",
+ confusion: "模型真正看到的是一条输入管道,不是单个神秘 system prompt 大文本。",
+ goal: "把系统规则、工具说明、动态上下文拆成可管理的输入片段。",
+ },
+ en: {
+ focus: "Focus on `PromptParts` and assembly order rather than one giant prompt string.",
+ confusion: "The model really sees an input pipeline, not one magical system prompt blob.",
+ goal: "Split system rules, tool descriptions, and dynamic context into manageable input parts.",
+ },
+ ja: {
+ focus: "`PromptParts` と組み立て順を見る。巨大な prompt 文字列だけを見ない。",
+ confusion: "モデルが実際に見るのは入力パイプラインであり、魔法の system prompt 1本ではありません。",
+ goal: "ルール、ツール説明、動的文脈を管理しやすい入力片へ分解する。",
+ },
+ },
+ s11: {
+ zh: {
+ focus: "先盯住 `RecoveryState` 和 `TransitionReason`,搞清“为什么继续”。",
+ confusion: "错误恢复不只是 try/except,而是系统知道自己该重试、压缩后重来,还是结束。",
+ goal: "让 agent 在可恢复错误后还能有条理地继续前进。",
+ },
+ en: {
+ focus: "Focus on `RecoveryState` and `TransitionReason`, especially why the system is continuing.",
+ confusion: "Recovery is not just `try/except`. The system must know whether to retry, compact and retry, or stop.",
+ goal: "Make the agent continue coherently after recoverable failures.",
+ },
+ ja: {
+ focus: "`RecoveryState` と `TransitionReason`、特に「なぜ続行するのか」を見る。",
+ confusion: "error recovery は単なる try/except ではなく、再試行・compact 後再試行・終了を区別することです。",
+ goal: "回復可能な失敗の後でも、agent が筋道立てて進めるようにする。",
+ },
+ },
+ s12: {
+ zh: {
+ focus: "先盯住 `TaskRecord`、`blockedBy`、`blocks` 这几项关系字段。",
+ confusion: "task 不再是会话里的步骤提醒,而是一张持久化工作图上的节点。",
+ goal: "做出一个会解锁后续任务的最小任务系统。",
+ },
+ en: {
+ focus: "Focus on `TaskRecord`, `blockedBy`, and `blocks`.",
+ confusion: "A task here is no longer a session reminder. It is a durable node in a work graph.",
+ goal: "Build a minimal task system that can unlock downstream work.",
+ },
+ ja: {
+ focus: "`TaskRecord`、`blockedBy`、`blocks` の関係を見る。",
+ confusion: "ここでの task は会話内メモではなく、永続 work graph のノードです。",
+ goal: "後続タスクを解放できる最小 task system を作る。",
+ },
+ },
+ s13: {
+ zh: {
+ focus: "先盯住 `RuntimeTaskState` 和 `Notification` 的分工。",
+ confusion: "后台任务不是任务板节点,而是当前正在跑的一条执行槽位。",
+ goal: "让慢命令后台运行,并在下一轮把结果带回模型。",
+ },
+ en: {
+ focus: "Focus on the split between `RuntimeTaskState` and `Notification`.",
+ confusion: "A background task is not a task-board node. It is a running execution slot.",
+ goal: "Run slow work in the background and bring the result back on a later turn.",
+ },
+ ja: {
+ focus: "`RuntimeTaskState` と `Notification` が何を分担しているかを見る。",
+ confusion: "バックグラウンドタスクはタスクボード上のノードではなく、いま走っている実行スロットです。",
+ goal: "遅い処理をバックグラウンドへ逃がし、次のターンで結果を主ループへ戻せるようにする。",
+ },
+ },
+ s14: {
+ zh: {
+ focus: "先盯住 `ScheduleRecord`、触发条件和实际执行任务之间的关系。",
+ confusion: "cron 不是任务本身,它只是“何时启动一份工作”的规则。",
+ goal: "让系统在未来某个时间自动触发工作,而不是只能等当前用户发话。",
+ },
+ en: {
+ focus: "Focus on the relationship between `ScheduleRecord`, trigger conditions, and the work that is actually launched.",
+ confusion: "Cron is not the task itself. It is a rule about when work should start.",
+ goal: "Trigger work at future times instead of waiting for the current user turn.",
+ },
+ ja: {
+ focus: "`ScheduleRecord`、発火条件、実際に起動される仕事の関係を見る。",
+ confusion: "cron は task そのものではなく、いつ仕事を始めるかのルールです。",
+ goal: "現在のユーザー発話だけでなく、将来時刻で自動的に仕事を起動できるようにする。",
+ },
+ },
+ s15: {
+ zh: {
+ focus: "先盯住 `TeamMember`、`MessageEnvelope` 和独立 inbox。",
+ confusion: "teammate 不是换了名字的 subagent,关键区别在“是否长期存在、能反复接活”。",
+ goal: "做出一个长期存在、能通过邮箱协作的多 agent 团队雏形。",
+ },
+ en: {
+ focus: "Focus on `TeamMember`, `MessageEnvelope`, and independent inboxes.",
+ confusion: "A teammate is not a renamed subagent. The difference is long-lived identity and repeatable responsibility.",
+ goal: "Build the first version of a long-lived multi-agent team that collaborates through mailboxes.",
+ },
+ ja: {
+ focus: "`TeamMember`、`MessageEnvelope`、独立 inbox を見る。",
+ confusion: "teammate は名前を変えた subagent ではなく、長寿命で繰り返し責務を持つ存在です。",
+ goal: "メールボックス経由で協力する長寿命マルチエージェントチームの雛形を作る。",
+ },
+ },
+ s16: {
+ zh: {
+ focus: "先盯住 `ProtocolEnvelope`、`request_id` 和 `RequestRecord`。",
+ confusion: "协议消息不是普通聊天消息,它必须能被系统追踪和更新状态。",
+ goal: "让团队协作从自由聊天升级成可批准、可拒绝、可跟踪的流程。",
+ },
+ en: {
+ focus: "Focus on `ProtocolEnvelope`, `request_id`, and `RequestRecord`.",
+ confusion: "A protocol message is not ordinary chat. The system must be able to track it and update its state.",
+ goal: "Turn team coordination from free-form chat into an approvable, rejectable, trackable flow.",
+ },
+ ja: {
+ focus: "`ProtocolEnvelope`、`request_id`、`RequestRecord` を見る。",
+ confusion: "protocol message は普通の会話ではなく、システムが追跡して状態更新できる必要があります。",
+ goal: "チーム協調を自由会話から、承認・拒否・追跡可能なフローへ上げる。",
+ },
+ },
+ s17: {
+ zh: {
+ focus: "先盯住 idle 恢复顺序、角色化 claim policy、claim event 和身份重注入这四件事。",
+ confusion: "自治的关键不是“它会不会自己想”,而是系统有没有定义清楚:空闲时先看谁、能认领什么、恢复时补回哪些上下文。",
+ goal: "让长期队友在不靠持续点名的情况下,也能按规则自己接住下一份工作。",
+ },
+ en: {
+ focus: "Focus on idle resume order, role-aware claim policy, claim events, and identity re-injection.",
+ confusion: "Autonomy is not the agent 'thinking on its own'. It is a defined rule for what an idle worker checks, what it may claim, and how it resumes safely.",
+ goal: "Let a long-lived teammate pick up the next piece of work without constant manual delegation.",
+ },
+ ja: {
+ focus: "特定製品らしさより、idle 復帰順序・役割付き claim policy・claim event・identity 再注入を見る。",
+ confusion: "自律性の核心は魔法の知能ではなく、空いた worker が何を先に確認し、何を claim でき、どう安全に再開するかの規則です。",
+ goal: "継続的に指名されなくても、長寿命 teammate が次の仕事を拾えるようにする。",
+ },
+ },
+ s18: {
+ zh: {
+ focus: "先盯住 `worktree_state`、`last_worktree`、`closeout`,再看 `worktree_enter` 和统一 closeout。",
+ confusion: "worktree 不是任务目标,也不是后台任务;它只是任务的独立执行通道,而且通道状态和任务状态不是一回事。",
+ goal: "让多个执行者并行改代码时,任务目标、执行目录和收尾动作都能被显式记录。",
+ },
+ en: {
+ focus: "Focus on `worktree_state`, `last_worktree`, `closeout`, then on explicit `worktree_enter` and unified closeout.",
+ confusion: "A worktree is neither the task goal nor the runtime task. It is the isolated execution lane, and lane state is not the same as task state.",
+ goal: "Make task goals, execution directories, and closeout decisions explicit when multiple workers edit in parallel.",
+ },
+ ja: {
+ focus: "`worktree_state`、`last_worktree`、`closeout` を先に見て、その後 `worktree_enter` と統一 closeout を見る。",
+ confusion: "worktree は task 目標でも runtime task でもなく、独立した実行レーンです。レーン状態と task 状態は別物です。",
+ goal: "複数 worker が並列でコードを触るとき、task 目標・実行ディレクトリ・収束動作を明示的に記録できるようにする。",
+ },
+ },
+ s19: {
+ zh: {
+ focus: "先盯住外部能力如何重新接回统一 router,而不是先掉进 transport 或认证细节。",
+ confusion: "MCP 不只是外部工具目录,但主线入口仍然应该先从 tools-first 去理解。",
+ goal: "把外部能力接进主系统,同时保持权限、路由和结果回流的一致性。",
+ },
+ en: {
+ focus: "Focus on how external capabilities rejoin the same router before diving into transport or auth detail.",
+ confusion: "MCP is more than an external tool catalog, but the cleanest mainline still starts with tools first.",
+ goal: "Connect external capabilities to the main system while keeping routing, permissions, and result flow consistent.",
+ },
+ ja: {
+ focus: "transport や auth の前に、外部 capability が同じ router へどう戻るかを見る。",
+ confusion: "MCP は単なる外部 tool 一覧ではないが、主線理解の入口は tools-first のままでよいです。",
+ goal: "外部 capability を主システムへ接続しつつ、routing・permission・結果回流の一貫性を保つ。",
+ },
+ },
+};
+
+export function getChapterGuide(version: string, locale: string): ChapterGuide | null {
+ const versionGuide = CHAPTER_GUIDES[version as VersionId];
+ if (!versionGuide) return null;
+ if (locale === "zh" || locale === "en" || locale === "ja") {
+ return versionGuide[locale];
+ }
+ return versionGuide.en;
+}
diff --git a/web/src/lib/constants.ts b/web/src/lib/constants.ts
index 0f1fdf7a8..0f039940e 100644
--- a/web/src/lib/constants.ts
+++ b/web/src/lib/constants.ts
@@ -1,37 +1,215 @@
export const VERSION_ORDER = [
- "s01", "s02", "s03", "s04", "s05", "s06", "s07", "s08", "s09", "s10", "s11", "s12"
+ "s01",
+ "s02",
+ "s03",
+ "s04",
+ "s05",
+ "s06",
+ "s07",
+ "s08",
+ "s09",
+ "s10",
+ "s11",
+ "s12",
+ "s13",
+ "s14",
+ "s15",
+ "s16",
+ "s17",
+ "s18",
+ "s19",
] as const;
export const LEARNING_PATH = VERSION_ORDER;
export type VersionId = typeof LEARNING_PATH[number];
+export type LearningLayer = "core" | "hardening" | "runtime" | "platform";
export const VERSION_META: Record = {
- s01: { title: "The Agent Loop", subtitle: "Bash is All You Need", coreAddition: "Single-tool agent loop", keyInsight: "The minimal agent kernel is a while loop + one tool", layer: "tools", prevVersion: null },
- s02: { title: "Tools", subtitle: "One Handler Per Tool", coreAddition: "Tool dispatch map", keyInsight: "The loop stays the same; new tools register into the dispatch map", layer: "tools", prevVersion: "s01" },
- s03: { title: "TodoWrite", subtitle: "Plan Before You Act", coreAddition: "TodoManager + nag reminder", keyInsight: "An agent without a plan drifts; list the steps first, then execute", layer: "planning", prevVersion: "s02" },
- s04: { title: "Subagents", subtitle: "Clean Context Per Subtask", coreAddition: "Subagent spawn with isolated messages[]", keyInsight: "Subagents use independent messages[], keeping the main conversation clean", layer: "planning", prevVersion: "s03" },
- s05: { title: "Skills", subtitle: "Load on Demand", coreAddition: "SkillLoader + two-layer injection", keyInsight: "Inject knowledge via tool_result when needed, not upfront in the system prompt", layer: "planning", prevVersion: "s04" },
- s06: { title: "Compact", subtitle: "Three-Layer Compression", coreAddition: "micro-compact + auto-compact + archival", keyInsight: "Context will fill up; three-layer compression strategy enables infinite sessions", layer: "memory", prevVersion: "s05" },
- s07: { title: "Tasks", subtitle: "Task Graph + Dependencies", coreAddition: "TaskManager with file-based state + dependency graph", keyInsight: "A file-based task graph with ordering, parallelism, and dependencies -- the coordination backbone for multi-agent work", layer: "planning", prevVersion: "s06" },
- s08: { title: "Background Tasks", subtitle: "Background Threads + Notifications", coreAddition: "BackgroundManager + notification queue", keyInsight: "Run slow operations in the background; the agent keeps thinking ahead", layer: "concurrency", prevVersion: "s07" },
- s09: { title: "Agent Teams", subtitle: "Teammates + Mailboxes", coreAddition: "TeammateManager + file-based mailbox", keyInsight: "When one agent can't finish, delegate to persistent teammates via async mailboxes", layer: "collaboration", prevVersion: "s08" },
- s10: { title: "Team Protocols", subtitle: "Shared Communication Rules", coreAddition: "request_id correlation for two protocols", keyInsight: "One request-response pattern drives all team negotiation", layer: "collaboration", prevVersion: "s09" },
- s11: { title: "Autonomous Agents", subtitle: "Scan Board, Claim Tasks", coreAddition: "Task board polling + timeout-based self-governance", keyInsight: "Teammates scan the board and claim tasks themselves; no need for the lead to assign each one", layer: "collaboration", prevVersion: "s10" },
- s12: { title: "Worktree + Task Isolation", subtitle: "Isolate by Directory", coreAddition: "Composable worktree lifecycle + event stream over a shared task board", keyInsight: "Each works in its own directory; tasks manage goals, worktrees manage directories, bound by ID", layer: "collaboration", prevVersion: "s11" },
+ s01: {
+ title: "The Agent Loop",
+ subtitle: "Minimal Closed Loop",
+ coreAddition: "LoopState + tool_result feedback",
+ keyInsight: "An agent is just a loop: send messages, execute tools, feed results back, repeat.",
+ layer: "core",
+ prevVersion: null,
+ },
+ s02: {
+ title: "Tool Use",
+ subtitle: "Route Intent into Action",
+ coreAddition: "Tool specs + dispatch map",
+ keyInsight: "Adding a tool means adding one handler. The loop never changes.",
+ layer: "core",
+ prevVersion: "s01",
+ },
+ s03: {
+ title: "TodoWrite",
+ subtitle: "Session Planning",
+ coreAddition: "PlanningState + reminder loop",
+ keyInsight: "A visible plan keeps the agent on track when tasks get complex.",
+ layer: "core",
+ prevVersion: "s02",
+ },
+ s04: {
+ title: "Subagent",
+ subtitle: "Fresh Context per Subtask",
+ coreAddition: "Delegation with isolated message history",
+ keyInsight: "A subagent is mainly a context boundary, not a process trick.",
+ layer: "core",
+ prevVersion: "s03",
+ },
+ s05: {
+ title: "Skills",
+ subtitle: "Discover Cheap, Load Deep",
+ coreAddition: "Skill registry + on-demand injection",
+ keyInsight: "Discover cheaply, load deeply -- only when needed.",
+ layer: "core",
+ prevVersion: "s04",
+ },
+ s06: {
+ title: "Context Compact",
+ subtitle: "Keep the Active Context Small",
+ coreAddition: "Persist markers + micro compact + summary compact",
+ keyInsight: "Compaction isn't deleting history -- it's relocating detail so the agent can keep working.",
+ layer: "core",
+ prevVersion: "s05",
+ },
+ s07: {
+ title: "Permission System",
+ subtitle: "Intent Must Pass Safety",
+ coreAddition: "deny / mode / allow / ask pipeline",
+ keyInsight: "Safety is a pipeline, not a boolean: deny, check mode, allow, then ask.",
+ layer: "hardening",
+ prevVersion: "s06",
+ },
+ s08: {
+ title: "Hook System",
+ subtitle: "Extend Without Rewriting the Loop",
+ coreAddition: "Lifecycle events + side-effect hooks",
+ keyInsight: "The loop owns control flow; hooks only observe, block, or annotate at named moments.",
+ layer: "hardening",
+ prevVersion: "s07",
+ },
+ s09: {
+ title: "Memory System",
+ subtitle: "Keep Only What Survives Sessions",
+ coreAddition: "Typed memory records + reload path",
+ keyInsight: "Memory gives direction; current observation gives truth.",
+ layer: "hardening",
+ prevVersion: "s08",
+ },
+ s10: {
+ title: "System Prompt",
+ subtitle: "Build Inputs as a Pipeline",
+ coreAddition: "Prompt sections + dynamic assembly",
+ keyInsight: "The model sees a constructed input pipeline, not one giant static string.",
+ layer: "hardening",
+ prevVersion: "s09",
+ },
+ s11: {
+ title: "Error Recovery",
+ subtitle: "Recover, Then Continue",
+ coreAddition: "Continuation reasons + retry branches",
+ keyInsight: "Most failures aren't true task failure -- they're signals to try a different path.",
+ layer: "hardening",
+ prevVersion: "s10",
+ },
+ s12: {
+ title: "Task System",
+ subtitle: "Durable Work Graph",
+ coreAddition: "Task records + dependencies + unlock rules",
+ keyInsight: "Todo lists help a session; durable task graphs coordinate work that outlives it.",
+ layer: "runtime",
+ prevVersion: "s11",
+ },
+ s13: {
+ title: "Background Tasks",
+ subtitle: "Separate Goal from Running Work",
+ coreAddition: "RuntimeTaskState + async execution slots",
+ keyInsight: "Background execution is a runtime lane, not a second main loop.",
+ layer: "runtime",
+ prevVersion: "s12",
+ },
+ s14: {
+ title: "Cron Scheduler",
+ subtitle: "Let Time Trigger Work",
+ coreAddition: "Scheduled triggers over runtime tasks",
+ keyInsight: "Scheduling is not a separate system -- it just feeds the same agent loop from a timer.",
+ layer: "runtime",
+ prevVersion: "s13",
+ },
+ s15: {
+ title: "Agent Teams",
+ subtitle: "Persistent Specialists",
+ coreAddition: "Team roster + teammate lifecycle",
+ keyInsight: "Teammates persist beyond one prompt, have identity, and coordinate through durable channels.",
+ layer: "platform",
+ prevVersion: "s14",
+ },
+ s16: {
+ title: "Team Protocols",
+ subtitle: "Shared Request-Response Rules",
+ coreAddition: "Protocol envelopes + request correlation",
+ keyInsight: "A protocol request is a structured message with an ID; the response must reference the same ID.",
+ layer: "platform",
+ prevVersion: "s15",
+ },
+ s17: {
+ title: "Autonomous Agents",
+ subtitle: "Self-Claim and Self-Resume",
+ coreAddition: "Idle polling + role-aware self-claim + resume context",
+ keyInsight: "Autonomy is a bounded mechanism -- idle, scan, claim, resume -- not magic.",
+ layer: "platform",
+ prevVersion: "s16",
+ },
+ s18: {
+ title: "Worktree Isolation",
+ subtitle: "Separate Directory, Separate Lane",
+ coreAddition: "Task-worktree state + explicit enter/closeout lifecycle",
+ keyInsight: "Tasks answer what; worktrees answer where. Keep them separate.",
+ layer: "platform",
+ prevVersion: "s17",
+ },
+ s19: {
+ title: "MCP & Plugin",
+ subtitle: "External Capability Bus",
+ coreAddition: "Scoped servers + capability routing",
+ keyInsight: "External capabilities join the same routing, permission, and result-append path as native tools.",
+ layer: "platform",
+ prevVersion: "s18",
+ },
};
export const LAYERS = [
- { id: "tools" as const, label: "Tools & Execution", color: "#3B82F6", versions: ["s01", "s02"] },
- { id: "planning" as const, label: "Planning & Coordination", color: "#10B981", versions: ["s03", "s04", "s05", "s07"] },
- { id: "memory" as const, label: "Memory Management", color: "#8B5CF6", versions: ["s06"] },
- { id: "concurrency" as const, label: "Concurrency", color: "#F59E0B", versions: ["s08"] },
- { id: "collaboration" as const, label: "Collaboration", color: "#EF4444", versions: ["s09", "s10", "s11", "s12"] },
+ {
+ id: "core" as const,
+ label: "Core Single-Agent",
+ color: "#2563EB",
+ versions: ["s01", "s02", "s03", "s04", "s05", "s06"],
+ },
+ {
+ id: "hardening" as const,
+ label: "Production Hardening",
+ color: "#059669",
+ versions: ["s07", "s08", "s09", "s10", "s11"],
+ },
+ {
+ id: "runtime" as const,
+ label: "Task Runtime",
+ color: "#D97706",
+ versions: ["s12", "s13", "s14"],
+ },
+ {
+ id: "platform" as const,
+ label: "Multi-Agent Platform",
+ color: "#DC2626",
+ versions: ["s15", "s16", "s17", "s18", "s19"],
+ },
] as const;
diff --git a/web/src/lib/diagram-localization.ts b/web/src/lib/diagram-localization.ts
new file mode 100644
index 000000000..0fcf84838
--- /dev/null
+++ b/web/src/lib/diagram-localization.ts
@@ -0,0 +1,828 @@
+export type DiagramLocale = "zh" | "en" | "ja";
+
+type ReplacementPair = readonly [from: string, to: string];
+
+const FLOW_REPLACEMENTS: Record, ReplacementPair[]> = {
+ zh: [
+ ["LLM Call", "模型调用"],
+ ["Model Call", "模型调用"],
+ ["User Input", "用户输入"],
+ ["Main Loop", "主循环"],
+ ["Continue Loop", "继续主循环"],
+ ["Continue or Exit", "继续或退出"],
+ ["Model Intent", "模型意图"],
+ ["Normalize", "规范化"],
+ ["Action", "动作"],
+ ["Permission", "权限"],
+ ["Policy?", "策略?"],
+ ["Ask User /", "询问用户 /"],
+ ["Return Deny", "返回拒绝"],
+ ["Append Structured", "追加结构化"],
+ ["Permission Result", "权限结果"],
+ ["Emit Lifecycle", "发出生命周期"],
+ ["Event", "事件"],
+ ["Hooks", "Hook"],
+ ["Registered?", "已注册?"],
+ ["Dispatch Hook", "分发 Hook"],
+ ["Envelope", "信封"],
+ ["Run Core Tool", "运行核心工具"],
+ ["Audit / Trace /", "审计 / 追踪 /"],
+ ["Policy Side Effects", "策略副作用"],
+ ["New Turn", "新一轮"],
+ ["Load Relevant", "加载相关"],
+ ["Assemble Prompt", "组装 Prompt"],
+ ["with Memory", "并注入记忆"],
+ ["Run Work", "执行工作"],
+ ["Extract Durable", "提炼持久"],
+ ["Facts", "事实"],
+ ["Persist Memory", "写入记忆"],
+ ["Next Session /", "下一会话 /"],
+ ["Next Turn", "下一轮"],
+ ["Stable Policy", "稳定策略"],
+ ["Runtime State", "运行时状态"],
+ ["Task Context", "任务上下文"],
+ ["Prompt Section", "Prompt 分段"],
+ ["Assembly", "装配"],
+ ["Tool Loop / Text", "工具循环 / 文本"],
+ ["Response", "响应"],
+ ["Tool Result", "工具结果"],
+ ["Error?", "出错?"],
+ ["Classify Error", "分类错误"],
+ ["Retry / Fallback /", "重试 / 回退 /"],
+ ["Ask User / Stop", "询问用户 / 停止"],
+ ["Write Continuation", "写入续行"],
+ ["Reason", "原因"],
+ ["Cron Tick", "Cron tick"],
+ ["Rule Match?", "规则命中?"],
+ ["Wait for Next", "等待下一次"],
+ ["Create Runtime", "创建运行时"],
+ ["Queue for", "排入"],
+ ["Background Runtime", "后台运行时"],
+ ["Notify Runtime /", "通知运行时 /"],
+ ["Write Schedule Event", "写入调度事件"],
+ ["Execution Continues", "执行继续"],
+ ["Elsewhere", "在其他车道"],
+ ["Capability", "能力"],
+ ["Request", "请求"],
+ ["Discover Native /", "发现原生 /"],
+ ["Plugin / MCP", "插件 / MCP"],
+ ["Route to", "路由到"],
+ ["Native Tool", "原生工具"],
+ ["Plugin or MCP", "插件或 MCP"],
+ ["Server Call", "服务调用"],
+ ["Normalize Result /", "标准化结果 /"],
+ ["Apply Policy", "应用策略"],
+ ["Append Back to", "回写到"],
+ ["Mainline", "主线"],
+ ["Create Todos", "创建待办"],
+ ["Execute Bash", "执行 Bash"],
+ ["Execute Tool", "执行工具"],
+ ["Execute Tool /", "执行工具 /"],
+ ["Protocol Action", "协议动作"],
+ ["Append Result", "追加结果"],
+ ["Append and", "追加并"],
+ ["Continue", "继续"],
+ ["Tool Dispatch", "工具分发"],
+ ["bash / read / write / edit", "bash / read / write / edit"],
+ ["Spawn Subagent", "生成子 Agent"],
+ ["fresh messages[]", "独立 messages[]"],
+ ["Subagent Loop", "子 Agent 循环"],
+ ["Read SKILL.md", "读取 SKILL.md"],
+ ["Inject via", "通过"],
+ ["tool_result", "tool_result"],
+ ["Compress Context", "压缩上下文"],
+ ["Over token", "超过 token"],
+ ["limit?", "上限?"],
+ ["Teammate", "队友"],
+ ["Spawn", "生成"],
+ ["Send Message", "发送消息"],
+ ["JSONL inbox", "JSONL 收件箱"],
+ ["Teammate Agent", "队友 Agent"],
+ ["own loop", "独立循环"],
+ ["Task Board CRUD", "任务板 CRUD"],
+ ["Unlock / Respect", "解锁 / 遵守"],
+ ["Dependencies", "依赖"],
+ ["Inbox First,", "先看收件箱,"],
+ ["Then Claimable Tasks", "再找可认领任务"],
+ ["Auto-Claim +", "自动认领 +"],
+ ["Write Claim Event", "写入认领事件"],
+ ["Enter Idle", "进入空闲"],
+ ["Phase", "阶段"],
+ ["Ensure Identity", "确保身份"],
+ ["Context", "上下文"],
+ ["Resume /", "恢复 /"],
+ ["New Work", "新工作"],
+ ["Create Durable", "创建持久"],
+ ["Request Record:", "请求记录:"],
+ ["pending -> resolved", "pending -> resolved"],
+ ["Task State:", "任务状态:"],
+ ["bind + worktree_state", "绑定 + worktree_state"],
+ ["Create / Enter", "创建 / 进入"],
+ ["Worktree Lane", "Worktree 执行通道"],
+ ["Run in", "运行于"],
+ ["Isolated Dir", "隔离目录"],
+ ["Emit enter / run /", "发出 enter / run /"],
+ ["closeout events", "closeout 事件"],
+ ["Optional Read", "可选读取"],
+ ["worktree_events", "worktree_events"],
+ ["worktree_closeout", "worktree_closeout"],
+ ["keep | remove", "保留 | 移除"],
+ ["Output", "输出"],
+ ["yes", "是"],
+ ["no", "否"],
+ ["allow", "允许"],
+ ["deny / ask", "拒绝 / 询问"],
+ ["observe", "观察"],
+ ["visible input", "可见输入"],
+ ["request", "请求"],
+ ["task", "任务"],
+ ["spawn", "生成"],
+ ["runtime", "运行时"],
+ ["sync", "同步"],
+ ["team tool?", "团队工具?"],
+ ["task tool?", "任务工具?"],
+ ["protocol tool?", "协议工具?"],
+ ["runtime tool?", "运行时工具?"],
+ ["worktree tool?", "worktree 工具?"],
+ ["load_skill?", "load_skill?"],
+ ["skill", "技能"],
+ ["other", "其他"],
+ ["claimable task", "可认领任务"],
+ ["inbox message", "收件箱消息"],
+ ["resume work", "恢复工作"],
+ ["task ops", "任务操作"],
+ ["task result", "任务结果"],
+ ["run/status", "运行/状态"],
+ ["run/status result", "运行/状态结果"],
+ ["create/enter", "创建/进入"],
+ ["create/enter result", "创建/进入结果"],
+ ["emit create/enter", "发出 create/enter"],
+ ["emit closeout", "发出 closeout"],
+ ["closeout result", "closeout 结果"],
+ ["optional query", "可选查询"],
+ ["events result", "事件结果"],
+ ["allocate lane", "分配车道"],
+ ["local", "本地"],
+ ["plugin / mcp", "插件 / mcp"],
+ ["idle", "空闲"],
+ ],
+ ja: [
+ ["LLM Call", "モデル呼び出し"],
+ ["Model Call", "モデル呼び出し"],
+ ["User Input", "ユーザー入力"],
+ ["Main Loop", "主ループ"],
+ ["Continue Loop", "ループ継続"],
+ ["Continue or Exit", "継続または終了"],
+ ["Model Intent", "モデル意図"],
+ ["Normalize", "正規化"],
+ ["Action", "操作"],
+ ["Permission", "権限"],
+ ["Policy?", "ポリシー?"],
+ ["Ask User /", "ユーザー確認 /"],
+ ["Return Deny", "拒否を返す"],
+ ["Append Structured", "構造化された"],
+ ["Permission Result", "権限結果を追加"],
+ ["Emit Lifecycle", "ライフサイクル"],
+ ["Event", "イベント"],
+ ["Hooks", "Hook"],
+ ["Registered?", "登録済み?"],
+ ["Dispatch Hook", "Hook を配信"],
+ ["Envelope", "封筒"],
+ ["Run Core Tool", "コアツール実行"],
+ ["Audit / Trace /", "監査 / 追跡 /"],
+ ["Policy Side Effects", "ポリシー副作用"],
+ ["New Turn", "新しいターン"],
+ ["Load Relevant", "関連"],
+ ["Assemble Prompt", "Prompt を組み立て"],
+ ["with Memory", "記憶を注入"],
+ ["Run Work", "作業実行"],
+ ["Extract Durable", "永続"],
+ ["Facts", "事実を抽出"],
+ ["Persist Memory", "記憶を保存"],
+ ["Next Session /", "次回セッション /"],
+ ["Next Turn", "次のターン"],
+ ["Stable Policy", "安定ポリシー"],
+ ["Runtime State", "ランタイム状態"],
+ ["Task Context", "タスク文脈"],
+ ["Prompt Section", "Prompt セクション"],
+ ["Assembly", "組み立て"],
+ ["Tool Loop / Text", "ツールループ / テキスト"],
+ ["Response", "応答"],
+ ["Tool Result", "ツール結果"],
+ ["Error?", "エラー?"],
+ ["Classify Error", "エラー分類"],
+ ["Retry / Fallback /", "再試行 / フォールバック /"],
+ ["Ask User / Stop", "ユーザー確認 / 停止"],
+ ["Write Continuation", "継続理由を"],
+ ["Reason", "記録"],
+ ["Cron Tick", "Cron tick"],
+ ["Rule Match?", "ルール一致?"],
+ ["Wait for Next", "次の"],
+ ["Create Runtime", "ランタイム"],
+ ["Queue for", "へ投入"],
+ ["Background Runtime", "バックグラウンド実行"],
+ ["Notify Runtime /", "ランタイム通知 /"],
+ ["Write Schedule Event", "スケジュールイベント記録"],
+ ["Execution Continues", "実行は"],
+ ["Elsewhere", "別レーンで継続"],
+ ["Capability", "能力"],
+ ["Request", "要求"],
+ ["Discover Native /", "ネイティブ /"],
+ ["Plugin / MCP", "プラグイン / MCP を探索"],
+ ["Route to", "へルーティング"],
+ ["Native Tool", "ネイティブツール"],
+ ["Plugin or MCP", "プラグインまたは MCP"],
+ ["Server Call", "サーバー呼び出し"],
+ ["Normalize Result /", "結果正規化 /"],
+ ["Apply Policy", "ポリシー適用"],
+ ["Append Back to", "主線へ"],
+ ["Mainline", "回写"],
+ ["Create Todos", "Todo 作成"],
+ ["Execute Bash", "Bash 実行"],
+ ["Execute Tool", "ツール実行"],
+ ["Execute Tool /", "ツール実行 /"],
+ ["Protocol Action", "プロトコル操作"],
+ ["Append Result", "結果を追加"],
+ ["Append and", "追加して"],
+ ["Continue", "継続"],
+ ["Tool Dispatch", "ツール分配"],
+ ["Spawn Subagent", "サブエージェント生成"],
+ ["fresh messages[]", "独立 messages[]"],
+ ["Subagent Loop", "サブエージェントループ"],
+ ["Read SKILL.md", "SKILL.md を読む"],
+ ["Inject via", "経由で注入"],
+ ["Compress Context", "文脈圧縮"],
+ ["Over token", "token"],
+ ["limit?", "上限超過?"],
+ ["Teammate", "チームメイト"],
+ ["Spawn", "生成"],
+ ["Send Message", "メッセージ送信"],
+ ["JSONL inbox", "JSONL inbox"],
+ ["Teammate Agent", "チームメイト Agent"],
+ ["own loop", "独自ループ"],
+ ["Task Board CRUD", "タスク板 CRUD"],
+ ["Unlock / Respect", "解除 / 尊重"],
+ ["Dependencies", "依存関係"],
+ ["Inbox First,", "まず inbox、"],
+ ["Then Claimable Tasks", "次に claimable tasks"],
+ ["Auto-Claim +", "自動 claim +"],
+ ["Write Claim Event", "claim event 記録"],
+ ["Enter Idle", "アイドルへ"],
+ ["Phase", "入る"],
+ ["Ensure Identity", "身元"],
+ ["Context", "文脈を確認"],
+ ["Resume /", "再開 /"],
+ ["New Work", "新規作業"],
+ ["Create Durable", "永続"],
+ ["Request Record:", "要求記録:"],
+ ["Task State:", "タスク状態:"],
+ ["Create / Enter", "作成 / 進入"],
+ ["Worktree Lane", "Worktree レーン"],
+ ["Run in", "で実行"],
+ ["Isolated Dir", "隔離ディレクトリ"],
+ ["Emit enter / run /", "enter / run /"],
+ ["closeout events", "closeout event 発行"],
+ ["Optional Read", "任意読み取り"],
+ ["Output", "出力"],
+ ["yes", "はい"],
+ ["no", "いいえ"],
+ ["allow", "許可"],
+ ["deny / ask", "拒否 / 確認"],
+ ["observe", "観測"],
+ ["visible input", "可視入力"],
+ ["request", "要求"],
+ ["task", "タスク"],
+ ["spawn", "生成"],
+ ["runtime", "ランタイム"],
+ ["sync", "同期"],
+ ["team tool?", "チームツール?"],
+ ["task tool?", "タスクツール?"],
+ ["protocol tool?", "プロトコルツール?"],
+ ["runtime tool?", "ランタイムツール?"],
+ ["worktree tool?", "worktree ツール?"],
+ ["skill", "スキル"],
+ ["other", "その他"],
+ ["idle", "アイドル"],
+ ],
+};
+
+const ARCHITECTURE_REPLACEMENTS: Record, ReplacementPair[]> = {
+ zh: [
+ [
+ "Background tasks fully separate the existence of work from one live execution attempt, which is where runtime records become first-class.",
+ "后台任务把“工作目标存在”与“某次执行正在运行”彻底拆开,runtime record 因而第一次成为一等结构。",
+ ],
+ [
+ "Drains background notifications before the next model call.",
+ "下一轮调用模型前,先把后台通知排空并带回主线。",
+ ],
+ [
+ "The durable task goal still lives on the task board.",
+ "持久任务目标仍然留在任务板上。",
+ ],
+ [
+ "Describes one running or completed execution slot.",
+ "描述一条正在运行或已经完成的执行槽位。",
+ ],
+ [
+ "The full artifact goes to disk while notifications carry only a preview.",
+ "完整产物写入磁盘,通知里只带预览摘要。",
+ ],
+ [
+ "Slow commands execute on a side path while the main loop keeps moving.",
+ "慢命令在旁路车道里执行,主循环继续向前推进。",
+ ],
+ [
+ "The bridge back into the main loop.",
+ "把结果重新带回主循环的桥梁。",
+ ],
+ [
+ "The loop creates a runtime record",
+ "主循环先创建 runtime record",
+ ],
+ [
+ "A background slot runs the slow command",
+ "后台槽位执行慢命令",
+ ],
+ [
+ "notification plus output_file returns to the main system",
+ "notification 与 output_file 一起回到主系统",
+ ],
+ ["Notification Drain", "通知排空"],
+ ["Task Goal", "任务目标"],
+ ["Background Execution Slot", "后台执行线"],
+ ["Agent Loop", "Agent 循环"],
+ ["Assistant Content", "Assistant 内容"],
+ ["Dispatch Map", "分发映射"],
+ ["Dispatch Entry", "分发条目"],
+ ["Todo List", "Todo 列表"],
+ ["Parent messages", "父 messages"],
+ ["Child messages", "子 messages"],
+ ["Subtask Request", "子任务请求"],
+ ["Skill Discovery", "技能发现"],
+ ["Skill Load", "技能加载"],
+ ["Skill Registry", "技能注册表"],
+ ["Persisted Output", "持久输出"],
+ ["Summary State", "摘要状态"],
+ ["Micro Compact Record", "微压缩记录"],
+ ["Summary Compact", "摘要压缩"],
+ ["Permission Gate", "权限闸门"],
+ ["Normalized Intent", "规范化意图"],
+ ["Lifecycle Events", "生命周期事件"],
+ ["Hook Registry", "Hook 注册表"],
+ ["Audit Sink", "审计落点"],
+ ["Memory Store", "记忆存储"],
+ ["Prompt Builder", "Prompt 构建器"],
+ ["Runtime Context", "运行时上下文"],
+ ["Section Order", "段落顺序"],
+ ["Recovery Manager", "恢复管理器"],
+ ["Continuation Reason", "续行原因"],
+ ["Retry Bounds", "重试边界"],
+ ["Unlock Rules", "解锁规则"],
+ ["Task Board", "任务板"],
+ ["Dependency Edges", "依赖边"],
+ ["Notification Drain", "通知排空"],
+ ["Task Goal", "任务目标"],
+ ["Schedule Matcher", "调度匹配器"],
+ ["Lead Orchestrator", "主协调者"],
+ ["Team Roster", "团队 roster"],
+ ["Inbox", "收件箱"],
+ ["Persistent Teammate", "持久队友"],
+ ["MessageEnvelope", "消息信封"],
+ ["Protocol Envelope", "协议信封"],
+ ["Protocol State Machine", "协议状态机"],
+ ["Request Store", "请求存储"],
+ ["Idle Poll Loop", "空闲轮询循环"],
+ ["Claim Policy", "认领策略"],
+ ["Claim Events", "认领事件"],
+ ["Autonomous Worker", "自治执行者"],
+ ["Task-to-Lane Binding", "任务到车道绑定"],
+ ["Closeout Semantics", "收尾语义"],
+ ["Worktree Index", "Worktree 索引"],
+ ["Event Log", "事件日志"],
+ ["Isolated Directory Lane", "隔离目录车道"],
+ ["Closeout Record", "收尾记录"],
+ ["Capability Router", "能力路由器"],
+ ["Shared Permission Gate", "共享权限闸门"],
+ ["Result Normalizer", "结果标准化器"],
+ ["Plugin Manifest", "插件清单"],
+ ["Capability View", "能力视图"],
+ ["Native Tool", "原生工具"],
+ ["MCP / Plugin Lane", "MCP / 插件车道"],
+ ["Scoped Capability", "作用域能力"],
+ ["NEW", "新增"],
+ ],
+ ja: [
+ [
+ "The first chapter establishes the smallest closed loop: user input enters messages[], the model decides whether to call a tool, and the result flows back into the same loop.",
+ "最初の章では最小の閉ループを作ります。ユーザー入力が `messages[]` に入り、モデルが tool を呼ぶか判断し、その結果が同じループへ戻ります。",
+ ],
+ [
+ "This chapter upgrades one tool call into a stable multi-tool routing layer while keeping the main loop unchanged.",
+ "この章では 1 回の tool 呼び出しを、主ループを変えずに複数 tool を安定して扱える routing 層へ引き上げます。",
+ ],
+ [
+ "The third chapter makes session planning explicit so the agent gains a dedicated session-planning state.",
+ "第 3 章ではセッション内の計画を明示化し、agent に専用の session-planning state を与えます。",
+ ],
+ [
+ "This chapter isolates subtasks from the parent context and introduces the first explicit multi-loop structure.",
+ "この章では子タスクを親文脈から切り離し、初めて明示的な multi-loop 構造を導入します。",
+ ],
+ [
+ "The skill system splits knowledge into a discovery layer and an on-demand loading layer so the prompt does not start bloated.",
+ "skill system は知識を discovery 層と on-demand loading 層へ分け、prompt が最初から膨らみすぎないようにします。",
+ ],
+ [
+ "Context compaction is where the system first separates the active window from offloaded detail so long sessions stay usable.",
+ "context compact では active window と外へ逃がした detail が初めて分かれ、長いセッションでも使い続けられるようになります。",
+ ],
+ [
+ "From this chapter onward, execution gets a real control-plane gate: model intent must become a permission request before it runs.",
+ "この章から実行前に本物の control-plane gate が入り、model intent は実行前に permission request へ変換されます。",
+ ],
+ [
+ "Hooks give the loop stable sidecar extension points so logging, audit, and tracing separate from the core path.",
+ "hook は loop の周囲に安定した sidecar 拡張点を与え、logging・audit・tracing を主線から分離します。",
+ ],
+ [
+ "Long-term memory layers cross-session facts away from immediate context and introduces a real durable knowledge container.",
+ "long-term memory は会話をまたぐ事実を即時文脈から分離し、本物の durable knowledge container を導入します。",
+ ],
+ [
+ "System input becomes an assembly pipeline here: the model no longer sees one giant mysterious prompt, but a bounded set of input sections.",
+ "ここでは system input が assembly pipeline になり、モデルは巨大で謎めいた prompt 1 本ではなく、境界のある入力 section 群を見るようになります。",
+ ],
+ [
+ "Error recovery formally brings failure into the state machine so the system records why it continues, retries, or stops.",
+ "error recovery は failure を正式に state machine へ取り込み、なぜ続行し、なぜ再試行し、なぜ停止するのかを記録します。",
+ ],
+ [
+ "The task system is where session steps become a durable work graph that can progress real work nodes across turns.",
+ "task system では session step が durable work graph へ昇格し、複数ターンをまたいで本当の work node を進められるようになります。",
+ ],
+ [
+ "The cron scheduler makes time a first-class trigger source while still handing execution off to the runtime layer.",
+ "cron scheduler は時間を first-class な trigger source に引き上げつつ、実行自体は runtime layer に渡します。",
+ ],
+ [
+ "This is where the system moves from one executor toward a long-lived team with persistent teammates, a roster, and inboxes.",
+ "ここでシステムは単一 executor から、persistent teammate・roster・inbox を持つ長期チームへ進みます。",
+ ],
+ [
+ "Team protocols upgrade collaboration from free-form text into structured request flows centered on request_id and durable request records.",
+ "team protocol は協調を自由文から、`request_id` と durable request record を中心にした structured request flow へ引き上げます。",
+ ],
+ [
+ "The autonomy chapter moves teammates from waiting for assignments to self-claiming eligible work under a claim policy and resuming with context.",
+ "autonomy 章では teammate が割り当て待ちから、claim policy の下で自分で仕事を見つけ、context を持って resume する段階へ進みます。",
+ ],
+ [
+ "The worktree chapter pulls execution environments out of the main directory: tasks still express goals while worktrees become isolated, observable, closeout-capable lanes.",
+ "worktree 章では実行環境を main directory から切り離します。task は引き続き goal を表し、worktree は isolated で観測可能な closeout 付き lane になります。",
+ ],
+ [
+ "The final chapter reunifies native tools, plugins, and MCP servers on one capability bus so external capability returns to the same control plane.",
+ "最後の章では native tool・plugin・MCP server を 1 本の capability bus へ再統合し、external capability を同じ control plane へ戻します。",
+ ],
+ [
+ "Each turn calls the model, handles the output, then decides whether to continue.",
+ "各ターンでモデルを呼び、出力を処理し、その後で続けるかどうかを決めます。",
+ ],
+ [
+ "User, assistant, and tool result history accumulates here.",
+ "ユーザー、assistant、tool result の履歴がここへ積み上がります。",
+ ],
+ [
+ "The agent becomes real when tool results return into the next reasoning step.",
+ "agent が本当に動き出すのは、tool result が次の推論へ戻るときです。",
+ ],
+ ["The smallest runnable session state.", "最小で実行可能なセッション状態です。"],
+ ["The model output for the current turn.", "現在のターンでモデルが出した内容です。"],
+ ["User message enters messages[]", "ユーザーメッセージが `messages[]` に入る"],
+ ["Model emits tool_use or text", "モデルが `tool_use` またはテキストを出す"],
+ ["Tool result writes back into the next turn", "tool result が次のターンへ書き戻される"],
+ ["Stable Main Loop", "安定した主ループ"],
+ [
+ "The main loop still only owns model calls and write-back.",
+ "主ループは引き続き、モデル呼び出しと結果の回写だけを担当します。",
+ ],
+ ["ToolSpec Catalog", "ToolSpec カタログ"],
+ ["Describes tool capabilities to the model.", "tool の能力をモデルへ説明します。"],
+ ["Routes a tool call to the correct handler by name.", "tool 名で対応する handler へルーティングします。"],
+ ["Structured tool arguments emitted by the model.", "モデルが出した構造化された tool 引数です。"],
+ ["Schema plus description.", "schema と説明です。"],
+ ["Mapping from tool name to function.", "tool 名から関数への対応表です。"],
+ ["The model selects a tool", "モデルが使う tool を選ぶ"],
+ ["The dispatch map resolves the handler", "dispatch map が handler を解決する"],
+ ["The handler returns a tool_result", "handler が `tool_result` を返す"],
+ ["Plan Before Execution", "実行前に計画する"],
+ ["Break the larger goal into trackable steps before acting.", "大きな目標を、いま追跡できる手順へ分解してから動きます。"],
+ ["Reminder Loop", "リマインダーループ"],
+ ["Each turn revisits the current todo list to avoid drift.", "各ターンで現在の todo を見直し、途中の漂流を防ぎます。"],
+ ["The smallest planning unit inside one session.", "1 セッション内での最小の計画単位です。"],
+ ["Tracks what steps exist and which one is active.", "どんな手順があり、どれが現在アクティブかを記録します。"],
+ ["Session-scoped, not durable.", "セッション内だけで使うもので、永続ではありません。"],
+ ["The goal becomes steps first", "まず目標を手順へ落とす"],
+ ["The current step guides tool choice", "現在の手順が tool 選択を導く"],
+ ["Progress writes back into planning state", "進捗が planning state へ書き戻される"],
+ ["Parent Loop", "親ループ"],
+ ["Keeps the main goal and the integration responsibility.", "主目標と最終統合の責任を保ちます。"],
+ ["Child Loop", "子ループ"],
+ ["Provides a clean context for the subtask.", "子タスクのためのきれいな文脈を与えます。"],
+ ["Delegation Boundary", "委譲境界"],
+ ["Defines when work is delegated versus kept in the parent loop.", "いつ仕事を子へ渡し、いつ親ループに残すかを決めます。"],
+ ["The parent agent's long-lived context.", "親 agent の長く保持される文脈です。"],
+ ["An isolated one-shot context for the delegated subtask.", "委譲された子タスク専用の、一回限りの独立文脈です。"],
+ ["One-shot Subagent", "単発サブエージェント"],
+ ["Exits after returning a summary and does not keep long-lived identity.", "要約を返したら終了し、長期的な identity は持ちません。"],
+ ["The boundary object handed from parent to child.", "親ループから子ループへ渡される境界オブジェクトです。"],
+ ["The parent loop defines a subtask", "親ループが子タスクを定義する"],
+ ["The child loop runs in isolated messages", "子ループが独立した `messages` で動く"],
+ ["A summary returns to the parent loop", "要約が親ループへ戻る"],
+ ["Learns which skills exist through a cheap discovery pass.", "軽い discovery pass で、どんな skill があるかを把握します。"],
+ ["Loads deep instructions only when they are actually needed.", "本当に必要になった時だけ深い説明を読み込みます。"],
+ ["Stores skill names, summaries, and paths.", "skill の名前、概要、パスを保存します。"],
+ ["Keep the Loop Lightweight", "ループを軽く保つ"],
+ ["Skills are injected on demand instead of being permanently fused into the system prompt.", "skill は system prompt に常駐させず、必要な時だけ注入します。"],
+ ["The deep instruction source for a skill.", "skill の深い説明を置く元ファイルです。"],
+ ["Discover the skill entry first", "まず skill の入口を見つける"],
+ ["Read SKILL.md when needed", "必要になった時だけ `SKILL.md` を読む"],
+ ["Feed the loaded result back into the main loop", "読み込んだ結果を主ループへ戻す"],
+ ["Compaction Trigger", "compact 発火条件"],
+ ["Decides when to compact as the token budget grows.", "token budget が増える中で、いつ compact するかを決めます。"],
+ ["Micro and Summary Compaction", "micro / summary compact"],
+ ["Compacts in layers with different levels of loss.", "損失の強さが異なる複数層で compact します。"],
+ ["Active Context", "アクティブ文脈"],
+ ["What the current turn must see directly.", "現在のターンが直接見る必要のある内容です。"],
+ ["Detail moved out of the active window but still readable later.", "active window の外へ移したが、後でまだ読み戻せる detail です。"],
+ ["The retained storyline after compaction.", "compact 後にも残す主線です。"],
+ ["Moves recent detail out of the hot window.", "直近の detail を hot window の外へ移します。"],
+ ["Preserves continuity of the mainline.", "主線の連続性を守ります。"],
+ ["Detail leaves the active window first", "まず detail を active window の外へ出す"],
+ ["The mainline is preserved as a summary", "主線は summary として保たれる"],
+ ["Raw detail is read back only when needed", "生の detail は必要時だけ読み戻す"],
+ ["deny / ask / allow happens before execution.", "`deny / ask / allow` の判断は実行前に行われます。"],
+ ["Mode Control", "モード制御"],
+ ["Modes such as default, plan, and auto affect the whole permission path.", "`default`・`plan`・`auto` などの mode が permission path 全体へ影響します。"],
+ ["Defines which tools or paths are allowed, denied, or sent for confirmation.", "どの tool や path を許可・拒否・確認送りにするかを定めます。"],
+ ["Writes allow / ask / deny back in structured form.", "`allow / ask / deny` を構造化して書き戻します。"],
+ ["The Loop No Longer Reaches Tools Directly", "主ループはもう tool へ直行しない"],
+ ["A tool call passes through the permission layer before actual execution.", "tool call は実行前に permission layer を通過します。"],
+ ["Translates raw tool calls into a policy-checkable object.", "生の tool call を policy 判定可能なオブジェクトへ変換します。"],
+ ["The model proposes an action", "モデルが行動案を出す"],
+ ["The permission layer returns allow / ask / deny", "permission layer が `allow / ask / deny` を返す"],
+ ["That result writes back into the main loop", "その結果が主ループへ書き戻される"],
+ ["The loop emits events at boundaries like pre_tool, post_tool, and on_error.", "loop は `pre_tool`・`post_tool`・`on_error` などの境界で event を出します。"],
+ ["Multiple hooks share one event contract.", "複数の hook が同じ event contract を共有します。"],
+ ["A structured event envelope carrying tool, input, result, error, and more.", "tool・input・result・error などを持つ構造化 event envelope です。"],
+ ["Keep the Mainline Small", "主線を小さく保つ"],
+ ["Side effects attach through hooks instead of invading every handler.", "副作用は各 handler に侵入させず、hook 経由で付けます。"],
+ ["A concrete side-effect sink.", "具体的な副作用の落とし先です。"],
+ ["The loop emits an event", "loop が event を出す"],
+ ["Hooks observe and produce side effects", "hook が観測して副作用を生む"],
+ ["The mainline continues without being rewritten", "主線は書き換えられず、そのまま進む"],
+ ["Memory Load/Write", "memory の読み込み / 書き込み"],
+ ["Load before the model call, then extract and write after the work turn.", "モデル呼び出し前に読み込み、作業ターンの後で抽出して書き戻します。"],
+ ["Carries the live process, not long-term cross-session knowledge.", "現在進行中の処理を運びますが、会話をまたぐ長期知識は持ちません。"],
+ ["Stores only durable facts that still matter across sessions.", "セッションをまたいでも価値のある durable fact だけを保存します。"],
+ ["Long-lived facts such as preferences and project constraints.", "好みや project 制約のような長期 fact です。"],
+ ["Relevant memory is loaded first", "関連 memory を先に読み込む"],
+ ["The main loop completes the current turn", "主ループが現在のターンを完了する"],
+ ["New durable facts are extracted and written back", "新しい durable fact を抽出して書き戻す"],
+ ["Assembles stable policy, runtime state, tools, and memory in a visible order.", "stable policy・runtime state・tool・memory を見える順序で組み立てます。"],
+ ["Each input fragment has its own explicit boundary.", "各入力片には明示的な境界があります。"],
+ ["Runtime fragments such as workspace state, task state, and memory.", "workspace state・task state・memory などの runtime 片です。"],
+ ["Model Input Construction", "モデル入力の構築"],
+ ["The loop constructs the full input before calling the model.", "loop はモデル呼び出し前に完全な入力を組み立てます。"],
+ ["Which fragment is assembled first versus later.", "どの入力片を先に組み、どれを後に組むかです。"],
+ ["Stable policy is assembled first", "stable policy を先に組み立てる"],
+ ["Runtime fragments are injected next", "その後で runtime fragment を注入する"],
+ ["Only then does the final input reach the model", "そこで初めて最終入力がモデルへ届く"],
+ ["Chooses retry, fallback, ask, or stop by failure type.", "failure の種類ごとに retry・fallback・ask・stop を選びます。"],
+ ["Makes the reason for continuation visible state.", "なぜ継続するのかを可視の state にします。"],
+ ["Prevents recovery branches from looping forever.", "recovery branch が無限ループしないようにします。"],
+ ["Failures Still Return to the Loop", "失敗も主ループへ戻る"],
+ ["Failures are not discarded; they write back with recovery semantics.", "失敗は捨てられず、recovery の意味を持ったまま書き戻されます。"],
+ ["The error classification and branch state.", "error の分類と branch state です。"],
+ ["A tool failure is classified first", "tool failure をまず分類する"],
+ ["The recovery layer chooses a branch", "recovery layer が branch を選ぶ"],
+ ["The continuation reason returns to the main loop", "continuation reason が主ループへ戻る"],
+ ["Checks which downstream nodes can start once one task completes.", "1 つの task が完了した後、どの downstream node が開始できるかを調べます。"],
+ ["The durable record surface for all work nodes.", "すべての work node を保持する durable な記録面です。"],
+ ["blockedBy / blocks record who depends on whom.", "`blockedBy / blocks` が誰が誰に依存するかを記録します。"],
+ ["Tasks Layer Away From the Session", "task はセッションの外側へ分かれる"],
+ ["Session-local todo becomes secondary while durable tasks enter the main architecture.", "session-local な todo は脇へ下がり、durable task が主設計へ入ってきます。"],
+ ["Durable fields for goal, status, dependencies, owner, and more.", "goal・status・dependency・owner などを持つ durable record です。"],
+ ["A task node is created", "task node が作られる"],
+ ["Dependency edges decide when work becomes ready", "dependency edge が ready になる時点を決める"],
+ ["Completion unlocks downstream nodes", "完了が downstream node を解放する"],
+ ["Only decides whether a rule matches.", "ルールが一致したかだけを判定します。"],
+ ["Records what should trigger and when.", "何をいつ発火させるかを記録します。"],
+ ["The concrete runtime instance created after a match.", "一致後に生成される具体的な runtime instance です。"],
+ ["Time Trigger Surface", "時間トリガー面"],
+ ["A cron tick is only a trigger surface, not the business execution itself.", "cron tick は trigger surface であり、業務実行そのものではありません。"],
+ ["One rule-match occurrence.", "1 回のルール一致イベントです。"],
+ ["A cron rule matches", "cron rule が一致する"],
+ ["A runtime task is created", "runtime task が作られる"],
+ ["The background runtime takes over execution", "background runtime が実行を引き継ぐ"],
+ ["Maintains the roster, assigns work, and watches team state.", "roster を維持し、役割を割り振り、team state を見守ります。"],
+ ["Stores each teammate's name, role, and status.", "各 teammate の名前・role・status を保存します。"],
+ ["A separate message boundary for each teammate.", "各 teammate ごとに独立した message 境界です。"],
+ ["A long-lived worker that can take repeated assignments.", "繰り返し仕事を受けられる長期 worker です。"],
+ ["A long-lived identity, not a one-shot delegation result.", "単発の委譲結果ではなく、長期 identity です。"],
+ ["A structured message carried through inboxes.", "inbox を流れる構造化メッセージです。"],
+ ["The lead defines responsibility", "lead が責務を定義する"],
+ ["Messages enter the teammate inbox", "メッセージが teammate inbox へ入る"],
+ ["The teammate runs independently and replies", "teammate が独立に動いて返信する"],
+ ["A fixed envelope with type, from, to, request_id, and payload.", "`type`・`from`・`to`・`request_id`・`payload` を持つ固定 envelope です。"],
+ ["Turns protocol requests into durable request records.", "protocol request を durable request record へ変換します。"],
+ ["Protocol Collaboration Channel", "プロトコル協調チャネル"],
+ ["Approvals, shutdowns, and handoffs all use the same request/response model.", "承認・停止・引き継ぎなどを同じ request/response model で扱います。"],
+ ["The real state center of a protocol workflow.", "protocol workflow の本当の状態中心です。"],
+ ["A protocol request is sent", "protocol request が送られる"],
+ ["request_id binds the durable state record", "`request_id` が durable state record を結び付ける"],
+ ["An explicit response writes back into the state machine", "明示的な response が state machine へ書き戻される"],
+ ["Checks inboxes and the task board on a cadence during idle time.", "idle 中に一定間隔で inbox と task board を確認します。"],
+ ["Only tasks that satisfy role and state conditions may be auto-claimed.", "role と state 条件を満たす task だけが auto-claim されます。"],
+ ["Records who claimed a task and from which source.", "誰が、どの source から task を claim したかを記録します。"],
+ ["Autonomous teammates still inherit durable protocol request state from the previous chapter.", "自律 teammate も前章の durable protocol request state を引き継ぎます。"],
+ ["Discovers eligible work while idle, then resumes execution.", "idle 中に着手可能な仕事を見つけ、その後 execution を resume します。"],
+ ["Decides whether the current role may claim a task.", "現在の role が task を claim できるかを判定します。"],
+ ["The teammate enters idle polling", "teammate が idle polling に入る"],
+ ["The claim policy selects eligible work", "claim policy が着手可能な仕事を選ぶ"],
+ ["Identity is re-injected and execution resumes", "identity を再注入して execution を再開する"],
+ ["The system records which task is using which execution lane.", "どの task がどの execution lane を使っているかをシステムが記録します。"],
+ ["Closeout explicitly decides whether to keep or remove the lane.", "closeout は lane を keep するか remove するかを明示的に決めます。"],
+ ["Registers each isolated lane's path, branch, and task_id.", "各 isolated lane の path・branch・task_id を登録します。"],
+ ["The task record shows which lane it is currently using.", "task record から、いまどの lane を使っているかが分かります。"],
+ ["Lifecycle events such as create, enter, run, and closeout.", "`create`・`enter`・`run`・`closeout` などのライフサイクル event です。"],
+ ["Different tasks do not share uncommitted changes by default.", "異なる task は、既定では未コミット変更を共有しません。"],
+ ["The execution record for one lane.", "1 本の lane に対応する execution record です。"],
+ ["The explicit result of keep versus reclaim.", "`keep` するか reclaim するかの明示結果です。"],
+ ["A task binds to a worktree lane", "task が worktree lane に結び付く"],
+ ["Commands run inside the isolated directory", "コマンドが isolated directory 内で走る"],
+ ["Closeout decides the lane's final fate", "closeout が lane の最終的な行き先を決める"],
+ ["Discovers capability first, then routes to native, plugin, or MCP.", "まず capability を発見し、その後で native・plugin・MCP のどこへ送るかを決めます。"],
+ ["External capabilities and native tools share one permission contract.", "external capability と native tool は同じ permission contract を共有します。"],
+ ["Remote results are normalized into a payload the main loop already understands.", "remote result も、主ループが理解できる標準 payload へ正規化されます。"],
+ ["Tells the system which external servers are available.", "どの external server が利用可能かをシステムへ伝えます。"],
+ ["Collects native, plugin, and MCP capability into one comparable view.", "native・plugin・MCP の capability を 1 つの比較可能な view へまとめます。"],
+ ["A local handler.", "ローカル handler です。"],
+ ["Remote capability provided by an external server or plugin.", "external server または plugin が提供する remote capability です。"],
+ ["A capability object carrying server, source, and risk information.", "`server`・`source`・`risk` を持つ capability object です。"],
+ ["Capability discovery happens first", "まず capability discovery を行う"],
+ ["Routing and permission stay unified", "routing と permission を分離せず 1 本に保つ"],
+ ["A normalized result writes back into the main loop", "正規化した結果を主ループへ書き戻す"],
+ [
+ "Background tasks fully separate the existence of work from one live execution attempt, which is where runtime records become first-class.",
+ "background task は「仕事目標が存在すること」と「今この実行が走っていること」を切り分け、runtime record を一等構造へ押し上げます。",
+ ],
+ [
+ "Drains background notifications before the next model call.",
+ "次のモデル呼び出し前に、バックグラウンド通知を回収して主線へ戻します。",
+ ],
+ [
+ "The durable task goal still lives on the task board.",
+ "永続 task goal は task board 側に残り続けます。",
+ ],
+ [
+ "Describes one running or completed execution slot.",
+ "いま走っている、または完了済みの execution slot を表します。",
+ ],
+ [
+ "The full artifact goes to disk while notifications carry only a preview.",
+ "完全な成果物はディスクへ書き出し、notification には preview だけを載せます。",
+ ],
+ [
+ "Slow commands execute on a side path while the main loop keeps moving.",
+ "遅いコマンドは side lane で実行され、その間も主ループは前へ進みます。",
+ ],
+ [
+ "The bridge back into the main loop.",
+ "結果を主ループへ戻す橋渡しです。",
+ ],
+ [
+ "The loop creates a runtime record",
+ "主ループが runtime record を作る",
+ ],
+ [
+ "A background slot runs the slow command",
+ "バックグラウンド slot が遅いコマンドを実行する",
+ ],
+ [
+ "notification plus output_file returns to the main system",
+ "notification と output_file が主システムへ戻る",
+ ],
+ ["Notification Drain", "通知ドレイン"],
+ ["Task Goal", "タスク目標"],
+ ["Background Execution Slot", "バックグラウンド実行スロット"],
+ ["Agent Loop", "Agent ループ"],
+ ["Assistant Content", "Assistant 内容"],
+ ["Dispatch Map", "ディスパッチマップ"],
+ ["Dispatch Entry", "ディスパッチ項目"],
+ ["Todo List", "Todo リスト"],
+ ["Parent messages", "親 messages"],
+ ["Child messages", "子 messages"],
+ ["Subtask Request", "サブタスク要求"],
+ ["Skill Discovery", "スキル探索"],
+ ["Skill Load", "スキル読み込み"],
+ ["Skill Registry", "スキルレジストリ"],
+ ["Persisted Output", "永続出力"],
+ ["Summary State", "要約状態"],
+ ["Micro Compact Record", "マイクロ compact 記録"],
+ ["Summary Compact", "要約 compact"],
+ ["Permission Gate", "権限ゲート"],
+ ["Normalized Intent", "正規化意図"],
+ ["Lifecycle Events", "ライフサイクルイベント"],
+ ["Hook Registry", "Hook レジストリ"],
+ ["Audit Sink", "監査シンク"],
+ ["Memory Store", "Memory ストア"],
+ ["Prompt Builder", "Prompt ビルダー"],
+ ["Runtime Context", "ランタイム文脈"],
+ ["Section Order", "セクション順序"],
+ ["Recovery Manager", "回復マネージャー"],
+ ["Continuation Reason", "継続理由"],
+ ["Retry Bounds", "再試行境界"],
+ ["Unlock Rules", "解放ルール"],
+ ["Task Board", "タスク板"],
+ ["Dependency Edges", "依存エッジ"],
+ ["Notification Drain", "通知ドレイン"],
+ ["Task Goal", "タスク目標"],
+ ["Schedule Matcher", "スケジュール照合器"],
+ ["Lead Orchestrator", "主オーケストレーター"],
+ ["Team Roster", "チーム roster"],
+ ["Inbox", "受信箱"],
+ ["Persistent Teammate", "常駐チームメイト"],
+ ["MessageEnvelope", "メッセージ封筒"],
+ ["Protocol Envelope", "プロトコル封筒"],
+ ["Protocol State Machine", "プロトコル状態機械"],
+ ["Request Store", "要求ストア"],
+ ["Idle Poll Loop", "アイドル輪詢ループ"],
+ ["Claim Policy", "claim ポリシー"],
+ ["Claim Events", "claim イベント"],
+ ["Autonomous Worker", "自律 worker"],
+ ["Task-to-Lane Binding", "task と lane の結合"],
+ ["Closeout Semantics", "closeout 意味論"],
+ ["Worktree Index", "Worktree インデックス"],
+ ["Event Log", "イベントログ"],
+ ["Isolated Directory Lane", "隔離ディレクトリレーン"],
+ ["Closeout Record", "closeout 記録"],
+ ["Capability Router", "capability ルーター"],
+ ["Shared Permission Gate", "共有権限ゲート"],
+ ["Result Normalizer", "結果正規化器"],
+ ["Plugin Manifest", "プラグインマニフェスト"],
+ ["Capability View", "capability view"],
+ ["Native Tool", "ネイティブツール"],
+ ["MCP / Plugin Lane", "MCP / plugin レーン"],
+ ["Scoped Capability", "スコープ付き capability"],
+ ["NEW", "新規"],
+ ],
+};
+
+function applyReplacementPairs(text: string, replacements: ReplacementPair[]): string {
+ return [...replacements]
+ .sort((a, b) => b[0].length - a[0].length)
+ .reduce((result, [from, to]) => result.split(from).join(to), text);
+}
+
+function normalizeMultilineText(text: string): string {
+ return text.replace(/\\n/g, "\n");
+}
+
+export function normalizeDiagramLocale(locale: string): DiagramLocale {
+ if (locale === "zh" || locale === "ja") return locale;
+ return "en";
+}
+
+export function translateFlowText(locale: string, text: string): string {
+ const normalizedLocale = normalizeDiagramLocale(locale);
+ const normalizedText = normalizeMultilineText(text);
+
+ if (normalizedLocale === "en") return normalizedText;
+
+ return applyReplacementPairs(normalizedText, FLOW_REPLACEMENTS[normalizedLocale]);
+}
+
+export function translateArchitectureText(locale: string, text: string): string {
+ const normalizedLocale = normalizeDiagramLocale(locale);
+ const normalizedText = normalizeMultilineText(text);
+
+ if (normalizedLocale === "en") return normalizedText;
+
+ return applyReplacementPairs(
+ normalizedText,
+ ARCHITECTURE_REPLACEMENTS[normalizedLocale]
+ );
+}
+
+export function pickDiagramText(
+ locale: string,
+ value: T
+): string {
+ const normalizedLocale = normalizeDiagramLocale(locale);
+
+ if (normalizedLocale === "zh") return value.zh;
+ if (normalizedLocale === "ja") return value.ja ?? value.en;
+ return value.en;
+}
diff --git a/web/src/lib/session-assets.ts b/web/src/lib/session-assets.ts
new file mode 100644
index 000000000..a7630bc98
--- /dev/null
+++ b/web/src/lib/session-assets.ts
@@ -0,0 +1,35 @@
+export const GENERIC_OVERVIEW_VERSIONS = new Set([
+ "s07",
+ "s08",
+ "s09",
+ "s10",
+ "s11",
+ "s12",
+ "s13",
+ "s14",
+ "s15",
+ "s16",
+ "s17",
+ "s18",
+ "s19",
+]);
+
+export const GENERIC_SCENARIO_VERSIONS = new Set(GENERIC_OVERVIEW_VERSIONS);
+
+export const GENERIC_ANNOTATION_VERSIONS = new Set(GENERIC_OVERVIEW_VERSIONS);
+
+export function resolveLegacySessionAssetVersion(version: string): string {
+ return version;
+}
+
+export function isGenericOverviewVersion(version: string): boolean {
+ return GENERIC_OVERVIEW_VERSIONS.has(version);
+}
+
+export function isGenericScenarioVersion(version: string): boolean {
+ return GENERIC_SCENARIO_VERSIONS.has(version);
+}
+
+export function isGenericAnnotationVersion(version: string): boolean {
+ return GENERIC_ANNOTATION_VERSIONS.has(version);
+}
diff --git a/web/src/lib/stage-checkpoints.ts b/web/src/lib/stage-checkpoints.ts
new file mode 100644
index 000000000..981942a90
--- /dev/null
+++ b/web/src/lib/stage-checkpoints.ts
@@ -0,0 +1,102 @@
+import type { LearningLayer, VersionId } from "@/lib/constants";
+
+type SupportedLocale = "zh" | "en" | "ja";
+
+export interface StageCheckpoint {
+ layer: LearningLayer;
+ entryVersion: VersionId;
+ endVersion: VersionId;
+ title: Record;
+ body: Record;
+ rebuild: Record;
+}
+
+export const STAGE_CHECKPOINTS: readonly StageCheckpoint[] = [
+ {
+ layer: "core",
+ entryVersion: "s01",
+ endVersion: "s06",
+ title: {
+ zh: "先停在这里,自己重做一遍单 agent 主骨架",
+ en: "Pause here and rebuild the single-agent system from scratch",
+ ja: "ここで一度止まり、単一 agent の背骨を自分で作り直す",
+ },
+ body: {
+ zh: "读完 `s01-s06` 后,最有价值的动作不是立刻跳去权限或团队,而是从空目录里重新做出主循环、工具分发、会话计划、子任务隔离、技能加载和上下文压缩。",
+ en: "You now have a complete single-agent system. The most valuable thing you can do right now is not rush ahead -- it's to open an empty directory and rebuild the loop, tool dispatch, session planning, subtask isolation, skill loading, and context compaction from memory.",
+ ja: "`s01-s06` の後で最も価値が高いのは、そのまま permission や team へ進むことではありません。空のディレクトリから、loop・tool dispatch・session planning・subtask isolation・skill loading・context compaction を作り直すことです。",
+ },
+ rebuild: {
+ zh: "一个能连续工作多轮、能调用工具、能写 todo、能委派子任务、能按需加载技能并且能做最小压缩的单 agent harness。",
+ en: "A single-agent harness that can survive multiple turns, call tools, keep a todo plan, delegate one-shot subtasks, load skills on demand, and compact context at the minimum useful level.",
+ ja: "複数ターン継続でき、tool を呼び、todo plan を持ち、単発 subtask を委譲し、skill を必要時だけ読み込み、最小限の compact を行える単一 agent harness。",
+ },
+ },
+ {
+ layer: "hardening",
+ entryVersion: "s07",
+ endVersion: "s11",
+ title: {
+ zh: "到这里先把控制面补稳,再进入任务系统",
+ en: "Stabilize the control plane before moving to the task runtime",
+ ja: "ここで制御面を安定させてからタスク実行層へ入る",
+ },
+ body: {
+ zh: "读完 `s07-s11` 后,应该先自己补出一条完整的控制面:执行前权限闸门、固定生命周期 Hook、跨会话记忆、输入装配和恢复续行分支。",
+ en: "Your agent now has real safety. Rebuild it with a permission gate before every tool call, lifecycle hooks for extension, cross-session memory, a prompt assembly pipeline, and structured recovery branches.",
+ ja: "`s07-s11` の後は、自分の制御面を一度まとめて作り直すべきです。実行前の permission gate、固定ライフサイクル hook、cross-session memory、入力組み立て、recovery 分岐を 1 本に戻します。",
+ },
+ rebuild: {
+ zh: "一个不只是会跑,而是已经补齐执行闸门、扩展插口、长期记忆、输入装配与恢复续行的稳固单 agent。",
+ en: "A single agent with a real control plane: execution gating, extension hooks, durable memory, input assembly, and recovery branches.",
+ ja: "ただ動くだけでなく、実行前 gate、拡張 hook、長期 memory、入力組み立て、recovery 分岐までそろった安定した single agent。",
+ },
+ },
+ {
+ layer: "runtime",
+ entryVersion: "s12",
+ endVersion: "s14",
+ title: {
+ zh: "到这里先把“工作系统”手搓出来,再看团队层",
+ en: "Build the work runtime before moving into teams",
+ ja: "ここで work runtime を作り切ってから team 層へ進む",
+ },
+ body: {
+ zh: "读完 `s12-s14` 后,读者最该做的是把 `task goal`、`runtime slot`、`notification` 和 `schedule trigger` 四层对象真的分开写出来,而不是只记住它们的名字。",
+ en: "Now build separate structures for task goals, runtime slots, notifications, and schedule triggers. Don't just remember the names -- implement each as a distinct piece.",
+ ja: "`s12-s14` の後で大事なのは、task goal・runtime slot・notification・schedule trigger を名前だけで覚えることではなく、別々の構造として実装し分けることです。",
+ },
+ rebuild: {
+ zh: "一套能记录持久任务、后台运行慢工作、用通知带回结果,并且允许时间触发开工的最小 runtime 系统。",
+ en: "A minimal runtime that can persist task goals, run slow work in the background, return results through notifications, and let time trigger new work.",
+ ja: "永続 task goal を持ち、遅い仕事を background で回し、notification で結果を戻し、時間で新しい仕事を起動できる最小 runtime system。",
+ },
+ },
+ {
+ layer: "platform",
+ entryVersion: "s15",
+ endVersion: "s19",
+ title: {
+ zh: "最后这一段要做的是平台边界,而不是只加很多功能",
+ en: "The final stage: building the platform boundary",
+ ja: "最後の段階で作るのは機能の山ではなくプラットフォーム境界です",
+ },
+ body: {
+ zh: "读完 `s15-s19` 后,最应该回头确认的是五层边界有没有彻底分清:teammate、protocol request、task、worktree 执行通道、external capability。",
+ en: "You've completed the entire course. The key test: can you cleanly separate teammate, protocol request, task, worktree lane, and external capability? If yes, you understand the full design backbone.",
+ ja: "`s15-s19` の後で最も確認すべきなのは、teammate・protocol request・task・worktree lane・external capability の 5 層を本当に分けて保てるかどうかです。",
+ },
+ rebuild: {
+ zh: "一个拥有长期队友、共享协议、自治认领、隔离执行通道,并把原生工具与外部能力接回同一控制面的平台雏形。",
+ en: "An agent platform with persistent teammates, shared protocols, autonomous claiming, isolated execution lanes, and one control plane for native and external capabilities.",
+ ja: "永続 teammate、共有 protocol、自律 claim、分離 execution lane、そして native/external capability を 1 つの control plane へ戻したプラットフォームの骨格。",
+ },
+ },
+] as const;
+
+export function getStageCheckpoint(
+ layer: LearningLayer | null | undefined
+): StageCheckpoint | null {
+ if (!layer) return null;
+ return STAGE_CHECKPOINTS.find((checkpoint) => checkpoint.layer === layer) ?? null;
+}
diff --git a/web/src/lib/version-content.ts b/web/src/lib/version-content.ts
new file mode 100644
index 000000000..03ccb3be8
--- /dev/null
+++ b/web/src/lib/version-content.ts
@@ -0,0 +1,325 @@
+import { VERSION_META, type VersionId } from "@/lib/constants";
+
+export type LearningLocale = "zh" | "en" | "ja";
+
+type VersionContent = {
+ subtitle: string;
+ coreAddition: string;
+ keyInsight: string;
+};
+
+const VERSION_CONTENT: Record> = {
+ zh: {
+ s01: {
+ subtitle: "最小闭环",
+ coreAddition: "LoopState + tool_result 回流",
+ keyInsight: "真正的 agent 起点,是把真实工具结果重新喂回模型,而不只是输出一段文本。",
+ },
+ s02: {
+ subtitle: "把意图路由成动作",
+ coreAddition: "工具规格 + 分发映射",
+ keyInsight: "主循环本身不用变复杂;工具能力靠一层清晰的路由面增长。",
+ },
+ s03: {
+ subtitle: "会话级计划",
+ coreAddition: "PlanningState + reminder loop",
+ keyInsight: "对多步骤任务来说,可见计划不是装饰,而是防止会话漂移的稳定器。",
+ },
+ s04: {
+ subtitle: "子任务使用全新上下文",
+ coreAddition: "带隔离消息历史的委派",
+ keyInsight: "把探索性工作移进干净上下文后,父 agent 才能持续盯住主目标。",
+ },
+ s05: {
+ subtitle: "先轻发现,再深加载",
+ coreAddition: "技能注册表 + 按需注入",
+ keyInsight: "专门知识不该一开始全部塞进上下文,而该在需要时被轻量发现、按需展开。",
+ },
+ s06: {
+ subtitle: "保持活跃上下文小而稳",
+ coreAddition: "持久标记 + 微压缩 + 总结压缩",
+ keyInsight: "压缩的目标不是删历史,而是保住连续性和下一步所需的工作记忆。",
+ },
+ s07: {
+ subtitle: "意图先过安全闸门",
+ coreAddition: "deny / mode / allow / ask 管线",
+ keyInsight: "模型产生的执行意图,必须先通过清晰的权限门,再变成真正动作。",
+ },
+ s08: {
+ subtitle: "不改主循环也能扩展",
+ coreAddition: "生命周期事件 + 副作用 Hook",
+ keyInsight: "Hook 让系统围绕主循环生长,而不是不断重写主循环本身。",
+ },
+ s09: {
+ subtitle: "只保存跨会话还成立的东西",
+ coreAddition: "类型化记忆记录 + reload 路径",
+ keyInsight: "只有跨会话、无法从当前工作重新推导的知识,才值得进入 memory。",
+ },
+ s10: {
+ subtitle: "把输入组装成流水线",
+ coreAddition: "Prompt 分段 + 动态装配",
+ keyInsight: "模型看到的不是一坨固定 prompt,而是一条按阶段拼装的输入流水线。",
+ },
+ s11: {
+ subtitle: "先恢复,再继续",
+ coreAddition: "continuation reason + retry 分支",
+ keyInsight: "系统必须清楚自己此刻是在继续、重试,还是处于恢复流程。",
+ },
+ s12: {
+ subtitle: "持久化工作图",
+ coreAddition: "Task 记录 + 依赖 + 解锁规则",
+ keyInsight: "Todo 适合会话内规划,持久任务图才负责跨步骤、跨阶段协调工作。",
+ },
+ s13: {
+ subtitle: "把任务目标和运行槽位分开",
+ coreAddition: "RuntimeTaskState + 异步执行槽位",
+ keyInsight: "持久任务描述要完成什么,运行槽位描述谁在跑、跑到哪里;两者相关但不是一回事。",
+ },
+ s14: {
+ subtitle: "让时间也能触发工作",
+ coreAddition: "基于 runtime task 的定时触发",
+ keyInsight: "当任务能后台运行以后,时间本身也会变成另一种启动入口。",
+ },
+ s15: {
+ subtitle: "长驻的专职队友",
+ coreAddition: "团队 roster + teammate 生命周期",
+ keyInsight: "系统一旦长期运行,就需要有名字、有身份、可持续存在的队友,而不只是一次性子任务。",
+ },
+ s16: {
+ subtitle: "共享请求-响应规则",
+ coreAddition: "协议信封 + 请求关联",
+ keyInsight: "团队只有在协作遵守共同消息模式时,才会变得可理解、可调试、可扩展。",
+ },
+ s17: {
+ subtitle: "自主认领,自主续跑",
+ coreAddition: "空闲轮询 + 角色感知认领 + 恢复上下文",
+ keyInsight: "自主性开始于:队友能安全找到可做的事、认领它,并带着正确身份继续执行。",
+ },
+ s18: {
+ subtitle: "独立目录,独立通道",
+ coreAddition: "task-worktree 状态 + 显式 enter / closeout 生命周期",
+ keyInsight: "task 管目标,worktree 管隔离执行通道和收尾状态;两者不能混成一个概念。",
+ },
+ s19: {
+ subtitle: "外部能力总线",
+ coreAddition: "作用域服务器 + 能力路由",
+ keyInsight: "外部能力系统不该是外挂;它们应和原生工具一起处在同一控制面上。",
+ },
+ },
+ en: {
+ s01: {
+ subtitle: "Minimal Closed Loop",
+ coreAddition: "LoopState + tool_result feedback",
+ keyInsight: "An agent is just a loop: send messages, execute tools, feed results back, repeat.",
+ },
+ s02: {
+ subtitle: "Route Intent into Action",
+ coreAddition: "Tool specs + dispatch map",
+ keyInsight: "Adding a tool means adding one handler. The loop never changes.",
+ },
+ s03: {
+ subtitle: "Session Planning",
+ coreAddition: "PlanningState + reminder loop",
+ keyInsight: "A visible plan keeps the agent on track when tasks get complex.",
+ },
+ s04: {
+ subtitle: "Fresh Context per Subtask",
+ coreAddition: "Delegation with isolated message history",
+ keyInsight: "A subagent is mainly a context boundary, not a process trick.",
+ },
+ s05: {
+ subtitle: "Discover Cheaply, Load Deeply",
+ coreAddition: "Skill registry + on-demand injection",
+ keyInsight: "Discover cheaply, load deeply -- only when needed.",
+ },
+ s06: {
+ subtitle: "Keep Active Context Small and Stable",
+ coreAddition: "Persist markers + micro compact + summary compact",
+ keyInsight: "Compaction isn't deleting history -- it's relocating detail so the agent can keep working.",
+ },
+ s07: {
+ subtitle: "Intent Must Pass a Safety Gate",
+ coreAddition: "deny / mode / allow / ask pipeline",
+ keyInsight: "Safety is a pipeline, not a boolean: deny, check mode, allow, then ask.",
+ },
+ s08: {
+ subtitle: "Extend Without Rewriting the Loop",
+ coreAddition: "Lifecycle events + side-effect hooks",
+ keyInsight: "The loop owns control flow; hooks only observe, block, or annotate at named moments.",
+ },
+ s09: {
+ subtitle: "Keep Only What Survives Sessions",
+ coreAddition: "Typed memory records + reload path",
+ keyInsight: "Memory gives direction; current observation gives truth.",
+ },
+ s10: {
+ subtitle: "Assemble Inputs as a Pipeline",
+ coreAddition: "Prompt sections + dynamic assembly",
+ keyInsight: "The model sees a constructed input pipeline, not one giant static string.",
+ },
+ s11: {
+ subtitle: "Recover, Then Continue",
+ coreAddition: "Continuation reasons + retry branches",
+ keyInsight: "Most failures aren't true task failure -- they're signals to try a different path.",
+ },
+ s12: {
+ subtitle: "Durable Work Graph",
+ coreAddition: "Task records + dependencies + unlock rules",
+ keyInsight: "Todo lists help a session; durable task graphs coordinate work that outlives it.",
+ },
+ s13: {
+ subtitle: "Background Execution Lanes",
+ coreAddition: "RuntimeTaskState + async execution slots",
+ keyInsight: "Background execution is a runtime lane, not a second main loop.",
+ },
+ s14: {
+ subtitle: "Let Time Trigger Work",
+ coreAddition: "Scheduled triggers over runtime tasks",
+ keyInsight: "Scheduling is not a separate system -- it just feeds the same agent loop from a timer.",
+ },
+ s15: {
+ subtitle: "Persistent Specialist Teammates",
+ coreAddition: "Team roster + teammate lifecycle",
+ keyInsight: "Teammates persist beyond one prompt, have identity, and coordinate through durable channels.",
+ },
+ s16: {
+ subtitle: "Shared Request-Response Rules",
+ coreAddition: "Protocol envelopes + request correlation",
+ keyInsight: "A protocol request is a structured message with an ID; the response must reference the same ID.",
+ },
+ s17: {
+ subtitle: "Self-Claim, Self-Resume",
+ coreAddition: "Idle polling + role-aware self-claim + resume context",
+ keyInsight: "Autonomy is a bounded mechanism -- idle, scan, claim, resume -- not magic.",
+ },
+ s18: {
+ subtitle: "Separate Directory, Separate Lane",
+ coreAddition: "Task-worktree state + explicit enter / closeout lifecycle",
+ keyInsight: "Tasks answer what; worktrees answer where. Keep them separate.",
+ },
+ s19: {
+ subtitle: "External Capability Bus",
+ coreAddition: "Scoped servers + capability routing",
+ keyInsight: "External capabilities join the same routing, permission, and result-append path as native tools.",
+ },
+ },
+ ja: {
+ s01: {
+ subtitle: "最小の閉ループ",
+ coreAddition: "LoopState + tool_result の戻し込み",
+ keyInsight: "本当の agent の始まりは、実際のツール結果をモデルへ戻すところにあり、単なる文章出力ではありません。",
+ },
+ s02: {
+ subtitle: "意図を実行へルーティングする",
+ coreAddition: "ツール仕様 + ディスパッチマップ",
+ keyInsight: "主ループを複雑にしなくても、きれいなルーティング層を置けばツール能力は増やせます。",
+ },
+ s03: {
+ subtitle: "セッション計画",
+ coreAddition: "PlanningState + reminder loop",
+ keyInsight: "多段作業では、見える計画は飾りではなく、会話の漂流を防ぐ安定器です。",
+ },
+ s04: {
+ subtitle: "サブタスクごとに新しい文脈を使う",
+ coreAddition: "分離されたメッセージ履歴を持つ委譲",
+ keyInsight: "探索作業をきれいなサブコンテキストへ移して初めて、親 agent は主目標へ集中し続けられます。",
+ },
+ s05: {
+ subtitle: "軽く見つけて、必要時に深く読む",
+ coreAddition: "スキルレジストリ + オンデマンド注入",
+ keyInsight: "専門知識は最初から全部を文脈へ詰め込まず、必要になった時だけ軽く見つけて深く展開するべきです。",
+ },
+ s06: {
+ subtitle: "活性コンテキストを小さく安定させる",
+ coreAddition: "永続マーカー + micro compact + summary compact",
+ keyInsight: "圧縮の目的は履歴を消すことではなく、連続性と次の一歩に必要な作業記憶を守ることです。",
+ },
+ s07: {
+ subtitle: "意図は先に安全ゲートを通る",
+ coreAddition: "deny / mode / allow / ask パイプライン",
+ keyInsight: "モデルが出した実行意図は、明確な権限ゲートを通った後で初めて実動作になるべきです。",
+ },
+ s08: {
+ subtitle: "主ループを書き換えずに拡張する",
+ coreAddition: "ライフサイクルイベント + 副作用 Hook",
+ keyInsight: "Hook は主ループの周囲へ機能を育てるためのもので、主ループ自体を何度も書き換えるためのものではありません。",
+ },
+ s09: {
+ subtitle: "セッションを越えて残るものだけ保存する",
+ coreAddition: "型付き memory record + reload 経路",
+ keyInsight: "現在の作業空間から再導出できない、セッションを越えて有効な知識だけが memory に入る価値があります。",
+ },
+ s10: {
+ subtitle: "入力をパイプラインとして組み立てる",
+ coreAddition: "Prompt セクション + 動的組み立て",
+ keyInsight: "モデルが見るのは巨大な固定 prompt 文字列ではなく、実行時に組み上がる入力パイプラインです。",
+ },
+ s11: {
+ subtitle: "回復してから続行する",
+ coreAddition: "continuation reason + retry 分岐",
+ keyInsight: "完成度の高い agent は、いま続行中なのか、再試行中なのか、回復処理中なのかを自分で区別できなければなりません。",
+ },
+ s12: {
+ subtitle: "永続ワークグラフ",
+ coreAddition: "Task record + 依存 + 解放ルール",
+ keyInsight: "Todo はセッション内計画に向きますが、長い作業の調整を担うのは永続 task graph です。",
+ },
+ s13: {
+ subtitle: "タスク目標と実行スロットを分ける",
+ coreAddition: "RuntimeTaskState + 非同期実行スロット",
+ keyInsight: "永続タスクは何を終えるべきかを表し、実行スロットは誰がどこまで走っているかを表します。両者は関連しますが同一ではありません。",
+ },
+ s14: {
+ subtitle: "時間でも仕事を起動できるようにする",
+ coreAddition: "runtime task 上の定時トリガー",
+ keyInsight: "タスクがバックグラウンド実行できるようになると、時間そのものも起動入口の一つになります。",
+ },
+ s15: {
+ subtitle: "常駐する専門チームメイト",
+ coreAddition: "チーム roster + teammate lifecycle",
+ keyInsight: "長く動くシステムには、その場限りのサブタスクではなく、名前と役割を持って居続けるチームメイトが必要です。",
+ },
+ s16: {
+ subtitle: "共有された request-response 規則",
+ coreAddition: "プロトコル封筒 + request の相関付け",
+ keyInsight: "協調が共通メッセージ規則に従う時、チームは初めて理解しやすく、デバッグしやすく、拡張しやすくなります。",
+ },
+ s17: {
+ subtitle: "自分で引き受け、自分で再開する",
+ coreAddition: "アイドル polling + 役割認識 claim + resume context",
+ keyInsight: "自律性は、チームメイトが実行可能な仕事を安全に見つけ、引き受け、正しい身元文脈で再開できるところから始まります。",
+ },
+ s18: {
+ subtitle: "別ディレクトリ、別レーン",
+ coreAddition: "task-worktree 状態 + 明示的な enter / closeout lifecycle",
+ keyInsight: "task は目標を管理し、worktree は隔離された実行レーンと収束状態を管理します。この二つは混ぜてはいけません。",
+ },
+ s19: {
+ subtitle: "外部 capability bus",
+ coreAddition: "scope 付き server + capability routing",
+ keyInsight: "外部 capability system は後付けの別物ではなく、ネイティブツールと同じ control plane に置くべきです。",
+ },
+ },
+};
+
+export function normalizeLearningLocale(locale: string): LearningLocale {
+ if (locale === "zh" || locale === "ja") return locale;
+ return "en";
+}
+
+export function getVersionContent(version: string, locale: string): VersionContent {
+ const normalizedLocale = normalizeLearningLocale(locale);
+ const content =
+ VERSION_CONTENT[normalizedLocale][version as VersionId] ??
+ VERSION_CONTENT.en[version as VersionId];
+
+ if (content) return content;
+
+ const fallback = VERSION_META[version];
+
+ return {
+ subtitle: fallback?.subtitle ?? "",
+ coreAddition: fallback?.coreAddition ?? "",
+ keyInsight: fallback?.keyInsight ?? "",
+ };
+}
diff --git a/web/src/lib/version-source.ts b/web/src/lib/version-source.ts
new file mode 100644
index 000000000..194163cf8
--- /dev/null
+++ b/web/src/lib/version-source.ts
@@ -0,0 +1,25 @@
+import type { AgentVersion } from "@/types/agent-data";
+
+type SourceLocale = "en" | "zh" | "ja";
+
+function normalizeSourceLocale(locale: string): SourceLocale {
+ if (locale === "zh") return "zh";
+ if (locale === "ja") return "ja";
+ return "en";
+}
+
+export function getLocalizedSource(
+ version: Pick,
+ locale: string
+): string {
+ const normalized = normalizeSourceLocale(locale);
+ const localized = version.sourceByLocale?.[normalized];
+ if (localized) return localized;
+
+ // `ja` falls back to English source by design.
+ const english = version.sourceByLocale?.en;
+ if (english) return english;
+
+ // Backward compatibility for old generated data without `sourceByLocale`.
+ return version.source;
+}
diff --git a/web/src/types/agent-data.ts b/web/src/types/agent-data.ts
index 7cf01a04d..c3e3c2d0d 100644
--- a/web/src/types/agent-data.ts
+++ b/web/src/types/agent-data.ts
@@ -10,7 +10,12 @@ export interface AgentVersion {
keyInsight: string;
classes: { name: string; startLine: number; endLine: number }[];
functions: { name: string; signature: string; startLine: number }[];
- layer: "tools" | "planning" | "memory" | "concurrency" | "collaboration";
+ layer: "core" | "hardening" | "runtime" | "platform";
+ sourceByLocale?: {
+ en?: string;
+ zh?: string;
+ ja?: string;
+ };
source: string;
}
@@ -24,9 +29,12 @@ export interface VersionDiff {
}
export interface DocContent {
- version: string;
+ version: string | null;
+ slug: string;
locale: "en" | "zh" | "ja";
title: string;
+ kind: "chapter" | "bridge";
+ filename: string;
content: string; // raw markdown
}