Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions chat-ui/src/components/tool-call-card.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,11 @@ export function ToolCallCard({ tool }: { tool: ToolCallState }) {
const inputDetails = toolInputDetails(tool);
const output = tool.output ? redactSensitiveText(truncate(tool.output, TOOL_OUTPUT_DISPLAY_LIMIT)) : "";

const autoExpand = tool.state === "running" || tool.state === "result" || tool.state === "error" || tool.state === "blocked";
const autoExpand = tool.state === "error" || tool.state === "blocked";
const [isOpen, setIsOpen] = useState(autoExpand);

useEffect(() => {
if (tool.state === "running" || tool.state === "result" || tool.state === "error" || tool.state === "blocked") {
if (tool.state === "error" || tool.state === "blocked") {
setIsOpen(true);
}
}, [tool.state]);
Expand Down
75 changes: 75 additions & 0 deletions research/chat-experience/phase-10g-pi-continuity.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Phase 10G Pi Continuity Context

Date: 2026-04-30

## Problem

A live Phantom-on-Murph browser session showed that Murph compaction can preserve
protocol validity and still lose host-level app facts that the user expects the
agent to remember, such as the exact page URL produced by `phantom_create_page`.
The symptom was not specific to page URLs. It was a continuity issue after a
long, tool-heavy run.

## Pi Grounding

Pi already provides the primitive we need:

- `transformContext` runs at the AgentMessage level before `convertToLlm`.
- Pi custom messages require the app to also provide a `convertToLlm`
implementation. Murph's default Pi converter intentionally passes only
`user`, `assistant`, and `toolResult` messages.
- Phantom should therefore inject host facts through `transformContext` as a
normal user-context message, not as a custom role that the default converter
would filter out.

Murph already exposes this primitive through `MurphOptions.transformContext`,
passes it through query normalization, and forwards it into the Pi harness.

## Decision

Do not build a parallel Phantom continuity runtime. Phantom should derive compact
host facts from its existing durable stream log and pass them to Murph through
`transformContext` as a Pi-compatible user-context message. Murph remains
responsible for raw transcript compaction, replay, tool-call protocol validity,
provider transport, and retry behavior.

## Current Implementation

- `src/chat/continuity-context.ts` scans the tail of `chat_stream_events`.
- It extracts user-visible page artifacts from `phantom_create_page` and
`phantom_preview_page`.
- It intentionally excludes `phantom_generate_login` authentication links from
page artifacts.
- It includes recent `session.compact_boundary` checkpoints.
- `src/agent/murph-context.ts` wraps that context in
`<phantom_chat_context>` and inserts it as a Pi-compatible user-context
message before the latest user message when possible.
- The chat query path uses this transform only on `agent_runtime: murph`.
Anthropic fallback can still receive the same context through the system
prompt append path.
- Tool call cards now default collapsed, with errors and blocked calls still
opening automatically.

## Verification

- Focused Phantom tests pass:
`bun test src/agent/__tests__/murph-context.test.ts src/chat/__tests__/continuity-context.test.ts src/chat/__tests__/writer.test.ts src/agent/__tests__/agent-sdk-boundary-callers.test.ts src/agent/__tests__/prompt-assembler.test.ts`
- Full Phantom tests pass: `bun test`.
- Phantom typecheck passes: `bun run typecheck`.
- Phantom lint passes: `bun run lint`.
- Chat UI typecheck and production build pass.
- Murph shim test and typecheck pass for `Options.transformContext`.

## Live Verification

Phantom was run locally on top of the locally rebuilt Murph shim with the OpenAI
provider and `gpt-5.5`.

Verified:

- A chat request created and previewed `/ui/continuity-smoke-final.html`.
- The served page returned HTTP 200 and contained the expected smoke text.
- A follow-up asking for the exact created page URL returned the page URL, not
a login link.
- Completed tool cards rendered collapsed by default. An errored tool card still
opened automatically.
68 changes: 67 additions & 1 deletion src/agent/__tests__/agent-sdk-boundary-callers.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@ import { z } from "zod/v4";
import { PhantomConfigSchema } from "../../config/schemas.ts";
import type { PhantomConfig } from "../../config/types.ts";
import { runMigrations } from "../../db/migrate.ts";
import { type AgentSdkQueryParams, type Query, type SDKMessage, __setAgentSdkQueryForTests } from "../agent-sdk.ts";
import {
type AgentSdkQueryOptions,
type AgentSdkQueryParams,
type Query,
type SDKMessage,
__setAgentSdkQueryForTests,
} from "../agent-sdk.ts";
import { executeChatQuery } from "../chat-query.ts";
import { CostTracker } from "../cost-tracker.ts";
import { runJudgeQuery } from "../judge-query.ts";
Expand Down Expand Up @@ -358,6 +364,54 @@ describe("Agent SDK boundary callers", () => {
expect(options?.thinking).toEqual({ type: "enabled", budgetTokens: 8192 });
});

test("chat query path passes Phantom continuity through Murph transformContext", async () => {
__setAgentSdkQueryForTests((params) => {
calls.push(params);
return queryFromMessages([initMessage(), assistantMessage("chat assistant"), resultMessage("chat result")]);
});

await executeChatQuery(
{
config: makeConfig({
agent_runtime: "murph",
model: "gpt-5.5",
provider: { type: "openai" },
}),
sessionStore: new SessionStore(db),
costTracker: new CostTracker(db),
memoryContextBuilder: null,
evolvedConfig: null,
roleTemplate: null,
onboardingPrompt: null,
mcpServerFactories: null,
},
"web:chat-session",
{ role: "user", content: "give me the page link" },
Date.now(),
{
signal: new AbortController().signal,
sessionContext: "User-visible page: http://127.0.0.1:3112/ui/profile.html",
onSdkEvent: () => {},
},
);
const options = calls[0]?.options as AgentSdkQueryOptions | undefined;
const transformContext = options?.transformContext;
expect(transformContext).toBeDefined();
const systemPrompt = calls[0]?.options?.systemPrompt;
if (typeof systemPrompt === "object" && systemPrompt !== null && "append" in systemPrompt) {
expect(systemPrompt.append).not.toContain("User-visible page");
} else {
throw new Error("Expected object system prompt");
}

const transformed = (await transformContext?.([{ role: "user", content: "same prompt" }])) ?? [];
expect(transformed).toHaveLength(2);
const contextMessage = transformed[0] as Record<string, unknown>;
expect(contextMessage.role).toBe("user");
expect(textFromContent(contextMessage.content)).toContain("<phantom_chat_context>");
expect(textFromContent(contextMessage.content)).toContain("http://127.0.0.1:3112/ui/profile.html");
});

test("chat query retries stale resume result frames without forwarding the error result", async () => {
const sdkEvents: SDKMessage[] = [];
let factoryCalls = 0;
Expand Down Expand Up @@ -474,3 +528,15 @@ describe("Agent SDK boundary callers", () => {
expect(options?.env?.OPENAI_API_KEY).toBe("openai-secret");
});
});

function textFromContent(content: unknown): string {
if (typeof content === "string") return content;
if (!Array.isArray(content)) return "";
return content
.map((item) => {
if (item === null || typeof item !== "object" || Array.isArray(item)) return "";
const block = item as Record<string, unknown>;
return block.type === "text" && typeof block.text === "string" ? block.text : "";
})
.join("\n");
}
57 changes: 57 additions & 0 deletions src/agent/__tests__/murph-context.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { describe, expect, test } from "bun:test";
import { createMurphContextTransform } from "../murph-context.ts";

describe("createMurphContextTransform", () => {
test("injects Phantom context as a Pi-compatible user message before the latest user message", async () => {
const transform = createMurphContextTransform("User-visible page: http://127.0.0.1:3100/ui/profile.html");
expect(transform).toBeDefined();

const userMessage = { role: "user", content: [{ type: "text", text: "Give me the link." }] };
const output = await transform?.([{ role: "assistant", content: [] }, userMessage]);

expect(output).toHaveLength(3);
expect(record(output?.[1])?.role).toBe("user");
expect(textContent(output?.[1])).toContain("<phantom_chat_context>");
expect(textContent(output?.[1])).toContain("http://127.0.0.1:3100/ui/profile.html");
expect(output?.[2]).toBe(userMessage);
});

test("replaces stale Phantom context messages instead of accumulating them", async () => {
const transform = createMurphContextTransform("Fresh context");
const staleContext = {
role: "user",
content: [{ type: "text", text: "<phantom_chat_context>\nStale context\n</phantom_chat_context>" }],
timestamp: 1,
};

const output =
(await transform?.([{ role: "assistant", content: [] }, staleContext, { role: "toolResult", content: [] }])) ??
[];

const phantomContexts = output.filter((message) => textContent(message).includes("<phantom_chat_context>"));
expect(phantomContexts).toHaveLength(1);
expect(textContent(phantomContexts[0])).toContain("Fresh context");
expect(output).not.toContain(staleContext);
});

test("returns undefined for empty context", () => {
expect(createMurphContextTransform(" ")).toBeUndefined();
expect(createMurphContextTransform(undefined)).toBeUndefined();
});
});

function record(value: unknown): Record<string, unknown> | undefined {
return value !== null && typeof value === "object" ? (value as Record<string, unknown>) : undefined;
}

function textContent(value: unknown): string {
const content = record(value)?.content;
if (typeof content === "string") return content;
if (!Array.isArray(content)) return "";
return content
.map((item) => {
const block = record(item);
return block?.type === "text" && typeof block.text === "string" ? block.text : "";
})
.join("\n");
}
8 changes: 8 additions & 0 deletions src/agent/__tests__/prompt-assembler.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,12 @@ describe("assemblePrompt UI vocabulary guidance", () => {
const prompt = assemblePrompt(baseConfig);
expect(prompt).toContain("public/_examples/");
});

test("distinguishes created page URLs from authentication links", () => {
const prompt = assemblePrompt(baseConfig);
expect(prompt).toContain("Page URLs and login URLs are different.");
expect(prompt).toContain("return the exact /ui/<path> page URL");
expect(prompt).toContain("Only call phantom_generate_login");
expect(prompt).toContain("Do not substitute");
});
});
3 changes: 3 additions & 0 deletions src/agent/agent-sdk.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ export type {
};

export type AgentSdkQueryParams = Parameters<typeof anthropicQuery>[0];
export type AgentSdkQueryOptions = NonNullable<AgentSdkQueryParams["options"]> & {
transformContext?: (messages: unknown[], signal?: AbortSignal) => Promise<unknown[]> | unknown[];
};
export type AgentSdkQuery = (params: AgentSdkQueryParams) => Query;
export type AgentSdkRuntimeSelection = {
agentRuntime: AgentRuntimeKind;
Expand Down
60 changes: 36 additions & 24 deletions src/agent/chat-query.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
// Extracted chat-specific query logic for the runForChat method.
// Lives outside runtime.ts to keep that file under the 300-line budget.

import { type McpServerConfig, type SDKMessage, type SDKUserMessage, query } from "./agent-sdk.ts";
import {
type AgentSdkQueryOptions,
type McpServerConfig,
type SDKMessage,
type SDKUserMessage,
query,
} from "./agent-sdk.ts";

type MessageParam = SDKUserMessage["message"];
import { buildAgentRuntimeEnv, resolveAgentRuntimeModel } from "../config/providers.ts";
Expand All @@ -14,6 +20,7 @@ import { type AgentCost, type AgentResponse, emptyCost } from "./events.ts";
import { createDangerousCommandBlocker, createFileTracker } from "./hooks.ts";
import { extractTextFromMessageParam } from "./message-param-utils.ts";
import { extractCost, extractTextFromMessage } from "./message-utils.ts";
import { createMurphContextTransform } from "./murph-context.ts";
import { permissionOptionsFromConfig } from "./permission-options.ts";
import { assemblePrompt } from "./prompt-assembler.ts";
import { isNoConversationFoundResult, sdkResultErrorText } from "./sdk-result-errors.ts";
Expand All @@ -36,7 +43,7 @@ export async function executeChatQuery(
sessionKey: string,
message: MessageParam,
startTime: number,
options: { signal: AbortSignal; onSdkEvent: (msg: SDKMessage) => void },
options: { signal: AbortSignal; onSdkEvent: (msg: SDKMessage) => void; sessionContext?: string },
): Promise<AgentResponse> {
const parts = sessionKey.split(":");
const channelId = parts[0] ?? "web";
Expand All @@ -55,14 +62,17 @@ export async function executeChatQuery(
/* Memory unavailable */
}
}
const useMurphContextTransform = deps.config.agent_runtime === "murph";
const appendPrompt = assemblePrompt(
deps.config,
memoryContext,
deps.evolvedConfig ?? undefined,
deps.roleTemplate ?? undefined,
deps.onboardingPrompt ?? undefined,
undefined,
useMurphContextTransform ? undefined : options.sessionContext,
);
const transformContext = useMurphContextTransform ? createMurphContextTransform(options.sessionContext) : undefined;
const queryModel = resolveAgentRuntimeModel(deps.config, deps.config.model);
const providerEnv = buildAgentRuntimeEnv(deps.config, queryModel);

Expand Down Expand Up @@ -93,30 +103,32 @@ export async function executeChatQuery(
await Promise.all(Object.entries(deps.mcpServerFactories).map(async ([k, f]) => [k, await f()] as const)),
)
: undefined;
const queryOptions: AgentSdkQueryOptions = {
model: queryModel,
...permissionOptions,
settingSources: ["project", "user"],
systemPrompt: {
type: "preset" as const,
preset: "claude_code" as const,
append: appendPrompt,
},
persistSession: true,
effort: deps.config.effort,
thinking: getThinkingConfig(queryModel),
includePartialMessages: true,
agentProgressSummaries: true,
promptSuggestions: true,
...(deps.config.max_budget_usd > 0 ? { maxBudgetUsd: deps.config.max_budget_usd } : {}),
abortController: controller,
env: { ...process.env, ...providerEnv },
hooks: { PreToolUse: [commandBlocker], PostToolUse: [fileTracker.hook] },
...(useResume && session?.sdk_session_id ? { resume: session.sdk_session_id } : {}),
...(mcpServers ? { mcpServers } : {}),
...(transformContext ? { transformContext } : {}),
};
const queryStream = query({
prompt: makePrompt(),
options: {
model: queryModel,
...permissionOptions,
settingSources: ["project", "user"],
systemPrompt: {
type: "preset" as const,
preset: "claude_code" as const,
append: appendPrompt,
},
persistSession: true,
effort: deps.config.effort,
thinking: getThinkingConfig(queryModel),
includePartialMessages: true,
agentProgressSummaries: true,
promptSuggestions: true,
...(deps.config.max_budget_usd > 0 ? { maxBudgetUsd: deps.config.max_budget_usd } : {}),
abortController: controller,
env: { ...process.env, ...providerEnv },
hooks: { PreToolUse: [commandBlocker], PostToolUse: [fileTracker.hook] },
...(useResume && session?.sdk_session_id ? { resume: session.sdk_session_id } : {}),
...(mcpServers ? { mcpServers } : {}),
},
options: queryOptions,
});

for await (const msg of queryStream) {
Expand Down
Loading
Loading