ghostwright · mcheemaa · May 1, 2026 · May 1, 2026
diff --git a/chat-ui/src/components/tool-call-card.tsx b/chat-ui/src/components/tool-call-card.tsx
@@ -161,11 +161,11 @@ export function ToolCallCard({ tool }: { tool: ToolCallState }) {
 	const inputDetails = toolInputDetails(tool);
 	const output = tool.output ? redactSensitiveText(truncate(tool.output, TOOL_OUTPUT_DISPLAY_LIMIT)) : "";
 
-	const autoExpand = tool.state === "running" || tool.state === "result" || tool.state === "error" || tool.state === "blocked";
+	const autoExpand = tool.state === "error" || tool.state === "blocked";
 	const [isOpen, setIsOpen] = useState(autoExpand);
 
 	useEffect(() => {
-		if (tool.state === "running" || tool.state === "result" || tool.state === "error" || tool.state === "blocked") {
+		if (tool.state === "error" || tool.state === "blocked") {
 			setIsOpen(true);
 		}
 	}, [tool.state]);

diff --git a/research/chat-experience/phase-10g-pi-continuity.md b/research/chat-experience/phase-10g-pi-continuity.md
@@ -0,0 +1,75 @@
+# Phase 10G Pi Continuity Context
+
+Date: 2026-04-30
+
+## Problem
+
+A live Phantom-on-Murph browser session showed that Murph compaction can preserve
+protocol validity and still lose host-level app facts that the user expects the
+agent to remember, such as the exact page URL produced by `phantom_create_page`.
+The symptom was not specific to page URLs. It was a continuity issue after a
+long, tool-heavy run.
+
+## Pi Grounding
+
+Pi already provides the primitive we need:
+
+- `transformContext` runs at the AgentMessage level before `convertToLlm`.
+- Pi custom messages require the app to also provide a `convertToLlm`
+  implementation. Murph's default Pi converter intentionally passes only
+  `user`, `assistant`, and `toolResult` messages.
+- Phantom should therefore inject host facts through `transformContext` as a
+  normal user-context message, not as a custom role that the default converter
+  would filter out.
+
+Murph already exposes this primitive through `MurphOptions.transformContext`,
+passes it through query normalization, and forwards it into the Pi harness.
+
+## Decision
+
+Do not build a parallel Phantom continuity runtime. Phantom should derive compact
+host facts from its existing durable stream log and pass them to Murph through
+`transformContext` as a Pi-compatible user-context message. Murph remains
+responsible for raw transcript compaction, replay, tool-call protocol validity,
+provider transport, and retry behavior.
+
+## Current Implementation
+
+- `src/chat/continuity-context.ts` scans the tail of `chat_stream_events`.
+- It extracts user-visible page artifacts from `phantom_create_page` and
+  `phantom_preview_page`.
+- It intentionally excludes `phantom_generate_login` authentication links from
+  page artifacts.
+- It includes recent `session.compact_boundary` checkpoints.
+- `src/agent/murph-context.ts` wraps that context in
+  `<phantom_chat_context>` and inserts it as a Pi-compatible user-context
+  message before the latest user message when possible.
+- The chat query path uses this transform only on `agent_runtime: murph`.
+  Anthropic fallback can still receive the same context through the system
+  prompt append path.
+- Tool call cards now default collapsed, with errors and blocked calls still
+  opening automatically.
+
+## Verification
+
+- Focused Phantom tests pass:
+  `bun test src/agent/__tests__/murph-context.test.ts src/chat/__tests__/continuity-context.test.ts src/chat/__tests__/writer.test.ts src/agent/__tests__/agent-sdk-boundary-callers.test.ts src/agent/__tests__/prompt-assembler.test.ts`
+- Full Phantom tests pass: `bun test`.
+- Phantom typecheck passes: `bun run typecheck`.
+- Phantom lint passes: `bun run lint`.
+- Chat UI typecheck and production build pass.
+- Murph shim test and typecheck pass for `Options.transformContext`.
+
+## Live Verification
+
+Phantom was run locally on top of the locally rebuilt Murph shim with the OpenAI
+provider and `gpt-5.5`.
+
+Verified:
+
+- A chat request created and previewed `/ui/continuity-smoke-final.html`.
+- The served page returned HTTP 200 and contained the expected smoke text.
+- A follow-up asking for the exact created page URL returned the page URL, not
+  a login link.
+- Completed tool cards rendered collapsed by default. An errored tool card still
+  opened automatically.
diff --git a/src/agent/__tests__/agent-sdk-boundary-callers.test.ts b/src/agent/__tests__/agent-sdk-boundary-callers.test.ts
@@ -4,7 +4,13 @@ import { z } from "zod/v4";
 import { PhantomConfigSchema } from "../../config/schemas.ts";
 import type { PhantomConfig } from "../../config/types.ts";
 import { runMigrations } from "../../db/migrate.ts";
-import { type AgentSdkQueryParams, type Query, type SDKMessage, __setAgentSdkQueryForTests } from "../agent-sdk.ts";
+import {
+	type AgentSdkQueryOptions,
+	type AgentSdkQueryParams,
+	type Query,
+	type SDKMessage,
+	__setAgentSdkQueryForTests,
+} from "../agent-sdk.ts";
 import { executeChatQuery } from "../chat-query.ts";
 import { CostTracker } from "../cost-tracker.ts";
 import { runJudgeQuery } from "../judge-query.ts";
@@ -358,6 +364,54 @@ describe("Agent SDK boundary callers", () => {
 		expect(options?.thinking).toEqual({ type: "enabled", budgetTokens: 8192 });
 	});
 
+	test("chat query path passes Phantom continuity through Murph transformContext", async () => {
+		__setAgentSdkQueryForTests((params) => {
+			calls.push(params);
+			return queryFromMessages([initMessage(), assistantMessage("chat assistant"), resultMessage("chat result")]);
+		});
+
+		await executeChatQuery(
+			{
+				config: makeConfig({
+					agent_runtime: "murph",
+					model: "gpt-5.5",
+					provider: { type: "openai" },
+				}),
+				sessionStore: new SessionStore(db),
+				costTracker: new CostTracker(db),
+				memoryContextBuilder: null,
+				evolvedConfig: null,
+				roleTemplate: null,
+				onboardingPrompt: null,
+				mcpServerFactories: null,
+			},
+			"web:chat-session",
+			{ role: "user", content: "give me the page link" },
+			Date.now(),
+			{
+				signal: new AbortController().signal,
+				sessionContext: "User-visible page: http://127.0.0.1:3112/ui/profile.html",
+				onSdkEvent: () => {},
+			},
+		);
+		const options = calls[0]?.options as AgentSdkQueryOptions | undefined;
+		const transformContext = options?.transformContext;
+		expect(transformContext).toBeDefined();
+		const systemPrompt = calls[0]?.options?.systemPrompt;
+		if (typeof systemPrompt === "object" && systemPrompt !== null && "append" in systemPrompt) {
+			expect(systemPrompt.append).not.toContain("User-visible page");
+		} else {
+			throw new Error("Expected object system prompt");
+		}
+
+		const transformed = (await transformContext?.([{ role: "user", content: "same prompt" }])) ?? [];
+		expect(transformed).toHaveLength(2);
+		const contextMessage = transformed[0] as Record<string, unknown>;
+		expect(contextMessage.role).toBe("user");
+		expect(textFromContent(contextMessage.content)).toContain("<phantom_chat_context>");
+		expect(textFromContent(contextMessage.content)).toContain("http://127.0.0.1:3112/ui/profile.html");
+	});
+
 	test("chat query retries stale resume result frames without forwarding the error result", async () => {
 		const sdkEvents: SDKMessage[] = [];
 		let factoryCalls = 0;
@@ -474,3 +528,15 @@ describe("Agent SDK boundary callers", () => {
 		expect(options?.env?.OPENAI_API_KEY).toBe("openai-secret");
 	});
 });
+
+function textFromContent(content: unknown): string {
+	if (typeof content === "string") return content;
+	if (!Array.isArray(content)) return "";
+	return content
+		.map((item) => {
+			if (item === null || typeof item !== "object" || Array.isArray(item)) return "";
+			const block = item as Record<string, unknown>;
+			return block.type === "text" && typeof block.text === "string" ? block.text : "";
+		})
+		.join("\n");
+}
diff --git a/src/agent/__tests__/murph-context.test.ts b/src/agent/__tests__/murph-context.test.ts
@@ -0,0 +1,57 @@
+import { describe, expect, test } from "bun:test";
+import { createMurphContextTransform } from "../murph-context.ts";
+
+describe("createMurphContextTransform", () => {
+	test("injects Phantom context as a Pi-compatible user message before the latest user message", async () => {
+		const transform = createMurphContextTransform("User-visible page: http://127.0.0.1:3100/ui/profile.html");
+		expect(transform).toBeDefined();
+
+		const userMessage = { role: "user", content: [{ type: "text", text: "Give me the link." }] };
+		const output = await transform?.([{ role: "assistant", content: [] }, userMessage]);
+
+		expect(output).toHaveLength(3);
+		expect(record(output?.[1])?.role).toBe("user");
+		expect(textContent(output?.[1])).toContain("<phantom_chat_context>");
+		expect(textContent(output?.[1])).toContain("http://127.0.0.1:3100/ui/profile.html");
+		expect(output?.[2]).toBe(userMessage);
+	});
+
+	test("replaces stale Phantom context messages instead of accumulating them", async () => {
+		const transform = createMurphContextTransform("Fresh context");
+		const staleContext = {
+			role: "user",
+			content: [{ type: "text", text: "<phantom_chat_context>\nStale context\n</phantom_chat_context>" }],
+			timestamp: 1,
+		};
+
+		const output =
+			(await transform?.([{ role: "assistant", content: [] }, staleContext, { role: "toolResult", content: [] }])) ??
+			[];
+
+		const phantomContexts = output.filter((message) => textContent(message).includes("<phantom_chat_context>"));
+		expect(phantomContexts).toHaveLength(1);
+		expect(textContent(phantomContexts[0])).toContain("Fresh context");
+		expect(output).not.toContain(staleContext);
+	});
+
+	test("returns undefined for empty context", () => {
+		expect(createMurphContextTransform("   ")).toBeUndefined();
+		expect(createMurphContextTransform(undefined)).toBeUndefined();
+	});
+});
+
+function record(value: unknown): Record<string, unknown> | undefined {
+	return value !== null && typeof value === "object" ? (value as Record<string, unknown>) : undefined;
+}
+
+function textContent(value: unknown): string {
+	const content = record(value)?.content;
+	if (typeof content === "string") return content;
+	if (!Array.isArray(content)) return "";
+	return content
+		.map((item) => {
+			const block = record(item);
+			return block?.type === "text" && typeof block.text === "string" ? block.text : "";
+		})
+		.join("\n");
+}
diff --git a/src/agent/__tests__/prompt-assembler.test.ts b/src/agent/__tests__/prompt-assembler.test.ts
@@ -161,4 +161,12 @@ describe("assemblePrompt UI vocabulary guidance", () => {
 		const prompt = assemblePrompt(baseConfig);
 		expect(prompt).toContain("public/_examples/");
 	});
+
+	test("distinguishes created page URLs from authentication links", () => {
+		const prompt = assemblePrompt(baseConfig);
+		expect(prompt).toContain("Page URLs and login URLs are different.");
+		expect(prompt).toContain("return the exact /ui/<path> page URL");
+		expect(prompt).toContain("Only call phantom_generate_login");
+		expect(prompt).toContain("Do not substitute");
+	});
 });
diff --git a/src/agent/agent-sdk.ts b/src/agent/agent-sdk.ts
@@ -35,6 +35,9 @@ export type {
 };
 
 export type AgentSdkQueryParams = Parameters<typeof anthropicQuery>[0];
+export type AgentSdkQueryOptions = NonNullable<AgentSdkQueryParams["options"]> & {
+	transformContext?: (messages: unknown[], signal?: AbortSignal) => Promise<unknown[]> | unknown[];
+};
 export type AgentSdkQuery = (params: AgentSdkQueryParams) => Query;
 export type AgentSdkRuntimeSelection = {
 	agentRuntime: AgentRuntimeKind;

diff --git a/src/agent/chat-query.ts b/src/agent/chat-query.ts
@@ -1,7 +1,13 @@
 // Extracted chat-specific query logic for the runForChat method.
 // Lives outside runtime.ts to keep that file under the 300-line budget.
 
-import { type McpServerConfig, type SDKMessage, type SDKUserMessage, query } from "./agent-sdk.ts";
+import {
+	type AgentSdkQueryOptions,
+	type McpServerConfig,
+	type SDKMessage,
+	type SDKUserMessage,
+	query,
+} from "./agent-sdk.ts";
 
 type MessageParam = SDKUserMessage["message"];
 import { buildAgentRuntimeEnv, resolveAgentRuntimeModel } from "../config/providers.ts";
@@ -14,6 +20,7 @@ import { type AgentCost, type AgentResponse, emptyCost } from "./events.ts";
 import { createDangerousCommandBlocker, createFileTracker } from "./hooks.ts";
 import { extractTextFromMessageParam } from "./message-param-utils.ts";
 import { extractCost, extractTextFromMessage } from "./message-utils.ts";
+import { createMurphContextTransform } from "./murph-context.ts";
 import { permissionOptionsFromConfig } from "./permission-options.ts";
 import { assemblePrompt } from "./prompt-assembler.ts";
 import { isNoConversationFoundResult, sdkResultErrorText } from "./sdk-result-errors.ts";
@@ -36,7 +43,7 @@ export async function executeChatQuery(
 	sessionKey: string,
 	message: MessageParam,
 	startTime: number,
-	options: { signal: AbortSignal; onSdkEvent: (msg: SDKMessage) => void },
+	options: { signal: AbortSignal; onSdkEvent: (msg: SDKMessage) => void; sessionContext?: string },
 ): Promise<AgentResponse> {
 	const parts = sessionKey.split(":");
 	const channelId = parts[0] ?? "web";
@@ -55,14 +62,17 @@ export async function executeChatQuery(
 			/* Memory unavailable */
 		}
 	}
+	const useMurphContextTransform = deps.config.agent_runtime === "murph";
 	const appendPrompt = assemblePrompt(
 		deps.config,
 		memoryContext,
 		deps.evolvedConfig ?? undefined,
 		deps.roleTemplate ?? undefined,
 		deps.onboardingPrompt ?? undefined,
 		undefined,
+		useMurphContextTransform ? undefined : options.sessionContext,
 	);
+	const transformContext = useMurphContextTransform ? createMurphContextTransform(options.sessionContext) : undefined;
 	const queryModel = resolveAgentRuntimeModel(deps.config, deps.config.model);
 	const providerEnv = buildAgentRuntimeEnv(deps.config, queryModel);
 
@@ -93,30 +103,32 @@ export async function executeChatQuery(
 					await Promise.all(Object.entries(deps.mcpServerFactories).map(async ([k, f]) => [k, await f()] as const)),
 				)
 			: undefined;
+		const queryOptions: AgentSdkQueryOptions = {
+			model: queryModel,
+			...permissionOptions,
+			settingSources: ["project", "user"],
+			systemPrompt: {
+				type: "preset" as const,
+				preset: "claude_code" as const,
+				append: appendPrompt,
+			},
+			persistSession: true,
+			effort: deps.config.effort,
+			thinking: getThinkingConfig(queryModel),
+			includePartialMessages: true,
+			agentProgressSummaries: true,
+			promptSuggestions: true,
+			...(deps.config.max_budget_usd > 0 ? { maxBudgetUsd: deps.config.max_budget_usd } : {}),
+			abortController: controller,
+			env: { ...process.env, ...providerEnv },
+			hooks: { PreToolUse: [commandBlocker], PostToolUse: [fileTracker.hook] },
+			...(useResume && session?.sdk_session_id ? { resume: session.sdk_session_id } : {}),
+			...(mcpServers ? { mcpServers } : {}),
+			...(transformContext ? { transformContext } : {}),
+		};
 		const queryStream = query({
 			prompt: makePrompt(),
-			options: {
-				model: queryModel,
-				...permissionOptions,
-				settingSources: ["project", "user"],
-				systemPrompt: {
-					type: "preset" as const,
-					preset: "claude_code" as const,
-					append: appendPrompt,
-				},
-				persistSession: true,
-				effort: deps.config.effort,
-				thinking: getThinkingConfig(queryModel),
-				includePartialMessages: true,
-				agentProgressSummaries: true,
-				promptSuggestions: true,
-				...(deps.config.max_budget_usd > 0 ? { maxBudgetUsd: deps.config.max_budget_usd } : {}),
-				abortController: controller,
-				env: { ...process.env, ...providerEnv },
-				hooks: { PreToolUse: [commandBlocker], PostToolUse: [fileTracker.hook] },
-				...(useResume && session?.sdk_session_id ? { resume: session.sdk_session_id } : {}),
-				...(mcpServers ? { mcpServers } : {}),
-			},
+			options: queryOptions,
 		});
 
 		for await (const msg of queryStream) {