ghostwright · mcheemaa · Apr 29, 2026 · Apr 28, 2026 · Apr 29, 2026 · Apr 29, 2026
diff --git a/src/agent/__tests__/thinking-config.test.ts b/src/agent/__tests__/thinking-config.test.ts
@@ -0,0 +1,87 @@
+// Coverage for every cell of the model x thinking-shape matrix the live
+// Messages API enforces (verified 2026-04-29). The helper is the single
+// source of truth for SDK `query()` thinking options across chat, judge,
+// runtime, and reflection callsites.
+
+import { describe, expect, test } from "bun:test";
+import { JUDGE_MODEL_HAIKU, JUDGE_MODEL_OPUS, JUDGE_MODEL_SONNET } from "../../evolution/judge-models.ts";
+import { getThinkingConfig } from "../thinking-config.ts";
+
+describe("getThinkingConfig", () => {
+	test("Opus 4.7 returns adaptive (Opus 4.7 rejects manual enabled with 400)", () => {
+		expect(getThinkingConfig(JUDGE_MODEL_OPUS)).toEqual({ type: "adaptive" });
+		expect(getThinkingConfig("claude-opus-4-7")).toEqual({ type: "adaptive" });
+	});
+
+	test("Opus 4.6 returns adaptive (recommended; manual is deprecated)", () => {
+		expect(getThinkingConfig("claude-opus-4-6")).toEqual({ type: "adaptive" });
+	});
+
+	test("Sonnet 4.6 returns adaptive (recommended; manual still functional)", () => {
+		expect(getThinkingConfig(JUDGE_MODEL_SONNET)).toEqual({ type: "adaptive" });
+		expect(getThinkingConfig("claude-sonnet-4-6")).toEqual({ type: "adaptive" });
+	});
+
+	test("Mythos preview returns adaptive", () => {
+		expect(getThinkingConfig("claude-mythos-preview")).toEqual({ type: "adaptive" });
+	});
+
+	test("Haiku 4.5 returns enabled + budgetTokens (Haiku rejects adaptive with 400)", () => {
+		const config = getThinkingConfig(JUDGE_MODEL_HAIKU);
+		expect(config.type).toBe("enabled");
+		if (config.type === "enabled") {
+			expect(config.budgetTokens).toBeGreaterThan(0);
+		}
+	});
+
+	test("older Haiku 3.x returns enabled + budgetTokens", () => {
+		const config = getThinkingConfig("claude-haiku-3-5");
+		expect(config.type).toBe("enabled");
+	});
+
+	test("older Sonnet 3.x returns enabled + budgetTokens", () => {
+		const config = getThinkingConfig("claude-sonnet-3-7");
+		expect(config.type).toBe("enabled");
+	});
+
+	test("legacy Opus 4.5 returns enabled + budgetTokens", () => {
+		const config = getThinkingConfig("claude-opus-4-5");
+		expect(config.type).toBe("enabled");
+	});
+
+	test("undefined model defaults to adaptive (safe for all new models)", () => {
+		expect(getThinkingConfig(undefined)).toEqual({ type: "adaptive" });
+	});
+
+	test("null model defaults to adaptive", () => {
+		expect(getThinkingConfig(null)).toEqual({ type: "adaptive" });
+	});
+
+	test("empty string defaults to adaptive", () => {
+		expect(getThinkingConfig("")).toEqual({ type: "adaptive" });
+	});
+
+	test("unknown future model defaults to adaptive", () => {
+		// Every new model since Opus 4.7 has been adaptive-only, so when
+		// we do not recognise the prefix we send adaptive. A wrong guess
+		// returns a clear 400 with the required shape, which is preferable
+		// to silent breakage in reflection.
+		expect(getThinkingConfig("claude-future-model-2027")).toEqual({ type: "adaptive" });
+	});
+
+	test("provider-prefixed names still match by suffix-free comparison fail-safe", () => {
+		// Some operators set `model: "anthropic/claude-haiku-4-5"` via
+		// LiteLLM. The helper currently does prefix-match on the bare
+		// Anthropic id. If a slash-prefix is used, we fall through to
+		// adaptive default, which is the safer of the two failure modes
+		// (adaptive will 400 with a clear error on Haiku rather than
+		// silently downgrading thinking).
+		expect(getThinkingConfig("anthropic/claude-haiku-4-5")).toEqual({ type: "adaptive" });
+	});
+
+	test("returned object is a fresh value (callers may spread it)", () => {
+		const a = getThinkingConfig(JUDGE_MODEL_OPUS);
+		const b = getThinkingConfig(JUDGE_MODEL_OPUS);
+		expect(a).toEqual(b);
+	});
+});
diff --git a/src/agent/chat-query.ts b/src/agent/chat-query.ts
@@ -18,6 +18,7 @@ import { extractCost, extractTextFromMessage } from "./message-utils.ts";
 import { permissionOptionsFromConfig } from "./permission-options.ts";
 import { assemblePrompt } from "./prompt-assembler.ts";
 import type { Session, SessionStore } from "./session-store.ts";
+import { getThinkingConfig } from "./thinking-config.ts";
 
 export type ChatQueryDeps = {
 	config: PhantomConfig;
@@ -106,6 +107,7 @@ export async function executeChatQuery(
 				},
 				persistSession: true,
 				effort: deps.config.effort,
+				thinking: getThinkingConfig(deps.config.model),
 				includePartialMessages: true,
 				agentProgressSummaries: true,
 				promptSuggestions: true,

diff --git a/src/agent/judge-query.ts b/src/agent/judge-query.ts
@@ -3,6 +3,7 @@ import { z } from "zod/v4";
 import { buildProviderEnv } from "../config/providers.ts";
 import type { PhantomConfig } from "../config/types.ts";
 import { extractTextFromMessage } from "./message-utils.ts";
+import { getThinkingConfig } from "./thinking-config.ts";
 
 // Judge subprocess integration. Routes LLM judge calls through the same
 // Agent SDK `query()` subprocess as the main agent so that auth, provider,
@@ -164,6 +165,7 @@ export async function runJudgeQuery<T>(
 			systemPrompt,
 			maxTurns: 1,
 			effort: "low",
+			thinking: getThinkingConfig(resolvedModel),
 			persistSession: false,
 			env: { ...process.env, ...providerEnv },
 		},

diff --git a/src/agent/runtime.ts b/src/agent/runtime.ts
@@ -19,6 +19,7 @@ import { extractCost, extractTextFromMessage } from "./message-utils.ts";
 import { permissionOptionsFromConfig } from "./permission-options.ts";
 import { assemblePrompt } from "./prompt-assembler.ts";
 import { SessionStore } from "./session-store.ts";
+import { getThinkingConfig } from "./thinking-config.ts";
 
 export type RuntimeEvent =
 	| { type: "init"; sessionId: string }
@@ -206,6 +207,7 @@ export class AgentRuntime {
 					systemPrompt: { type: "preset" as const, preset: "claude_code" as const, append: appendPrompt },
 					persistSession: true,
 					effort: this.config.effort,
+					thinking: getThinkingConfig(this.config.model),
 					...(this.config.max_budget_usd > 0 ? { maxBudgetUsd: this.config.max_budget_usd } : {}),
 					abortController: controller,
 					env: { ...process.env, ...providerEnv },

diff --git a/src/agent/thinking-config.ts b/src/agent/thinking-config.ts
@@ -0,0 +1,52 @@
+// Single-source-of-truth picker for the Agent SDK `thinking` option.
+//
+// The matrix is non-uniform across models:
+//   - Opus 4.7 only accepts `{ type: "adaptive" }`. Manual `enabled +
+//     budget_tokens` is rejected with a 400.
+//   - Haiku 4.5 only accepts `{ type: "enabled", budget_tokens: N }`.
+//     Adaptive is rejected with a 400.
+//   - Sonnet 4.6 accepts both shapes (manual is deprecated but still
+//     functional).
+//
+// Verified against the live Messages API on 2026-04-29; see the design
+// note at local/2026-04-29-thinking-config-design.md (local-only).
+//
+// Every SDK `query()` callsite spreads `getThinkingConfig(model)` instead
+// of hard-coding a single shape, so reflection (Haiku tier), chat (Opus
+// tier), judges (Sonnet tier), and the AgentRuntime path all pick the
+// correct shape. New models default to adaptive because every model
+// Anthropic has shipped since 4.7 only accepts adaptive.
+
+import type { ThinkingConfig } from "@anthropic-ai/claude-agent-sdk";
+
+const ADAPTIVE_PREFIXES: readonly string[] = [
+	"claude-opus-4-7",
+	"claude-opus-4-6",
+	"claude-sonnet-4-6",
+	"claude-mythos",
+];
+
+const MANUAL_ONLY_PREFIXES: readonly string[] = [
+	"claude-haiku-4",
+	"claude-haiku-3",
+	"claude-sonnet-3",
+	"claude-sonnet-4-5",
+	"claude-opus-4-5",
+];
+
+const MANUAL_BUDGET_TOKENS = 8192;
+
+export function getThinkingConfig(model: string | undefined | null): ThinkingConfig {
+	if (!model) return { type: "adaptive" };
+	if (ADAPTIVE_PREFIXES.some((p) => model.startsWith(p))) {
+		return { type: "adaptive" };
+	}
+	if (MANUAL_ONLY_PREFIXES.some((p) => model.startsWith(p))) {
+		return { type: "enabled", budgetTokens: MANUAL_BUDGET_TOKENS };
+	}
+	// Unknown model: prefer adaptive. Every model Anthropic has released
+	// since Opus 4.7 only accepts adaptive, so a new variant is far more
+	// likely to require adaptive than to require manual mode. If wrong,
+	// the API returns a clear 400 error with the required shape.
+	return { type: "adaptive" };
+}
diff --git a/src/evolution/reflection-subprocess.ts b/src/evolution/reflection-subprocess.ts
@@ -1,6 +1,7 @@
 import { appendFileSync, existsSync, mkdirSync, readdirSync, unlinkSync, writeFileSync } from "node:fs";
 import { dirname, join } from "node:path";
 import { query } from "@anthropic-ai/claude-agent-sdk";
+import { getThinkingConfig } from "../agent/thinking-config.ts";
 import { buildProviderEnv } from "../config/providers.ts";
 import type { PhantomConfig } from "../config/types.ts";
 import type { EvolutionConfig } from "./config.ts";
@@ -610,6 +611,7 @@ async function defaultRunner(input: SpawnQueryInput): Promise<SpawnQueryResult>
 				permissionMode: "bypassPermissions",
 				allowDangerouslySkipPermissions: true,
 				tools: ["Read", "Write", "Edit", "Glob", "Grep"],
+				thinking: getThinkingConfig(model),
 				systemPrompt,
 				settings: {
 					permissions: { allow, deny },