diff --git a/chat-ui/src/components/tool-call-card.tsx b/chat-ui/src/components/tool-call-card.tsx index 6d381762..e65f8322 100644 --- a/chat-ui/src/components/tool-call-card.tsx +++ b/chat-ui/src/components/tool-call-card.tsx @@ -161,11 +161,11 @@ export function ToolCallCard({ tool }: { tool: ToolCallState }) { const inputDetails = toolInputDetails(tool); const output = tool.output ? redactSensitiveText(truncate(tool.output, TOOL_OUTPUT_DISPLAY_LIMIT)) : ""; - const autoExpand = tool.state === "running" || tool.state === "result" || tool.state === "error" || tool.state === "blocked"; + const autoExpand = tool.state === "error" || tool.state === "blocked"; const [isOpen, setIsOpen] = useState(autoExpand); useEffect(() => { - if (tool.state === "running" || tool.state === "result" || tool.state === "error" || tool.state === "blocked") { + if (tool.state === "error" || tool.state === "blocked") { setIsOpen(true); } }, [tool.state]); diff --git a/research/chat-experience/phase-10g-pi-continuity.md b/research/chat-experience/phase-10g-pi-continuity.md new file mode 100644 index 00000000..75be1083 --- /dev/null +++ b/research/chat-experience/phase-10g-pi-continuity.md @@ -0,0 +1,75 @@ +# Phase 10G Pi Continuity Context + +Date: 2026-04-30 + +## Problem + +A live Phantom-on-Murph browser session showed that Murph compaction can preserve +protocol validity and still lose host-level app facts that the user expects the +agent to remember, such as the exact page URL produced by `phantom_create_page`. +The symptom was not specific to page URLs. It was a continuity issue after a +long, tool-heavy run. + +## Pi Grounding + +Pi already provides the primitive we need: + +- `transformContext` runs at the AgentMessage level before `convertToLlm`. +- Pi custom messages require the app to also provide a `convertToLlm` + implementation. Murph's default Pi converter intentionally passes only + `user`, `assistant`, and `toolResult` messages. +- Phantom should therefore inject host facts through `transformContext` as a + normal user-context message, not as a custom role that the default converter + would filter out. + +Murph already exposes this primitive through `MurphOptions.transformContext`, +passes it through query normalization, and forwards it into the Pi harness. + +## Decision + +Do not build a parallel Phantom continuity runtime. Phantom should derive compact +host facts from its existing durable stream log and pass them to Murph through +`transformContext` as a Pi-compatible user-context message. Murph remains +responsible for raw transcript compaction, replay, tool-call protocol validity, +provider transport, and retry behavior. + +## Current Implementation + +- `src/chat/continuity-context.ts` scans the tail of `chat_stream_events`. +- It extracts user-visible page artifacts from `phantom_create_page` and + `phantom_preview_page`. +- It intentionally excludes `phantom_generate_login` authentication links from + page artifacts. +- It includes recent `session.compact_boundary` checkpoints. +- `src/agent/murph-context.ts` wraps that context in + `` and inserts it as a Pi-compatible user-context + message before the latest user message when possible. +- The chat query path uses this transform only on `agent_runtime: murph`. + Anthropic fallback can still receive the same context through the system + prompt append path. +- Tool call cards now default collapsed, with errors and blocked calls still + opening automatically. + +## Verification + +- Focused Phantom tests pass: + `bun test src/agent/__tests__/murph-context.test.ts src/chat/__tests__/continuity-context.test.ts src/chat/__tests__/writer.test.ts src/agent/__tests__/agent-sdk-boundary-callers.test.ts src/agent/__tests__/prompt-assembler.test.ts` +- Full Phantom tests pass: `bun test`. +- Phantom typecheck passes: `bun run typecheck`. +- Phantom lint passes: `bun run lint`. +- Chat UI typecheck and production build pass. +- Murph shim test and typecheck pass for `Options.transformContext`. + +## Live Verification + +Phantom was run locally on top of the locally rebuilt Murph shim with the OpenAI +provider and `gpt-5.5`. + +Verified: + +- A chat request created and previewed `/ui/continuity-smoke-final.html`. +- The served page returned HTTP 200 and contained the expected smoke text. +- A follow-up asking for the exact created page URL returned the page URL, not + a login link. +- Completed tool cards rendered collapsed by default. An errored tool card still + opened automatically. diff --git a/src/agent/__tests__/agent-sdk-boundary-callers.test.ts b/src/agent/__tests__/agent-sdk-boundary-callers.test.ts index cc42bf84..de86cb13 100644 --- a/src/agent/__tests__/agent-sdk-boundary-callers.test.ts +++ b/src/agent/__tests__/agent-sdk-boundary-callers.test.ts @@ -4,7 +4,13 @@ import { z } from "zod/v4"; import { PhantomConfigSchema } from "../../config/schemas.ts"; import type { PhantomConfig } from "../../config/types.ts"; import { runMigrations } from "../../db/migrate.ts"; -import { type AgentSdkQueryParams, type Query, type SDKMessage, __setAgentSdkQueryForTests } from "../agent-sdk.ts"; +import { + type AgentSdkQueryOptions, + type AgentSdkQueryParams, + type Query, + type SDKMessage, + __setAgentSdkQueryForTests, +} from "../agent-sdk.ts"; import { executeChatQuery } from "../chat-query.ts"; import { CostTracker } from "../cost-tracker.ts"; import { runJudgeQuery } from "../judge-query.ts"; @@ -358,6 +364,54 @@ describe("Agent SDK boundary callers", () => { expect(options?.thinking).toEqual({ type: "enabled", budgetTokens: 8192 }); }); + test("chat query path passes Phantom continuity through Murph transformContext", async () => { + __setAgentSdkQueryForTests((params) => { + calls.push(params); + return queryFromMessages([initMessage(), assistantMessage("chat assistant"), resultMessage("chat result")]); + }); + + await executeChatQuery( + { + config: makeConfig({ + agent_runtime: "murph", + model: "gpt-5.5", + provider: { type: "openai" }, + }), + sessionStore: new SessionStore(db), + costTracker: new CostTracker(db), + memoryContextBuilder: null, + evolvedConfig: null, + roleTemplate: null, + onboardingPrompt: null, + mcpServerFactories: null, + }, + "web:chat-session", + { role: "user", content: "give me the page link" }, + Date.now(), + { + signal: new AbortController().signal, + sessionContext: "User-visible page: http://127.0.0.1:3112/ui/profile.html", + onSdkEvent: () => {}, + }, + ); + const options = calls[0]?.options as AgentSdkQueryOptions | undefined; + const transformContext = options?.transformContext; + expect(transformContext).toBeDefined(); + const systemPrompt = calls[0]?.options?.systemPrompt; + if (typeof systemPrompt === "object" && systemPrompt !== null && "append" in systemPrompt) { + expect(systemPrompt.append).not.toContain("User-visible page"); + } else { + throw new Error("Expected object system prompt"); + } + + const transformed = (await transformContext?.([{ role: "user", content: "same prompt" }])) ?? []; + expect(transformed).toHaveLength(2); + const contextMessage = transformed[0] as Record; + expect(contextMessage.role).toBe("user"); + expect(textFromContent(contextMessage.content)).toContain(""); + expect(textFromContent(contextMessage.content)).toContain("http://127.0.0.1:3112/ui/profile.html"); + }); + test("chat query retries stale resume result frames without forwarding the error result", async () => { const sdkEvents: SDKMessage[] = []; let factoryCalls = 0; @@ -474,3 +528,15 @@ describe("Agent SDK boundary callers", () => { expect(options?.env?.OPENAI_API_KEY).toBe("openai-secret"); }); }); + +function textFromContent(content: unknown): string { + if (typeof content === "string") return content; + if (!Array.isArray(content)) return ""; + return content + .map((item) => { + if (item === null || typeof item !== "object" || Array.isArray(item)) return ""; + const block = item as Record; + return block.type === "text" && typeof block.text === "string" ? block.text : ""; + }) + .join("\n"); +} diff --git a/src/agent/__tests__/murph-context.test.ts b/src/agent/__tests__/murph-context.test.ts new file mode 100644 index 00000000..87c19c2a --- /dev/null +++ b/src/agent/__tests__/murph-context.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, test } from "bun:test"; +import { createMurphContextTransform } from "../murph-context.ts"; + +describe("createMurphContextTransform", () => { + test("injects Phantom context as a Pi-compatible user message before the latest user message", async () => { + const transform = createMurphContextTransform("User-visible page: http://127.0.0.1:3100/ui/profile.html"); + expect(transform).toBeDefined(); + + const userMessage = { role: "user", content: [{ type: "text", text: "Give me the link." }] }; + const output = await transform?.([{ role: "assistant", content: [] }, userMessage]); + + expect(output).toHaveLength(3); + expect(record(output?.[1])?.role).toBe("user"); + expect(textContent(output?.[1])).toContain(""); + expect(textContent(output?.[1])).toContain("http://127.0.0.1:3100/ui/profile.html"); + expect(output?.[2]).toBe(userMessage); + }); + + test("replaces stale Phantom context messages instead of accumulating them", async () => { + const transform = createMurphContextTransform("Fresh context"); + const staleContext = { + role: "user", + content: [{ type: "text", text: "\nStale context\n" }], + timestamp: 1, + }; + + const output = + (await transform?.([{ role: "assistant", content: [] }, staleContext, { role: "toolResult", content: [] }])) ?? + []; + + const phantomContexts = output.filter((message) => textContent(message).includes("")); + expect(phantomContexts).toHaveLength(1); + expect(textContent(phantomContexts[0])).toContain("Fresh context"); + expect(output).not.toContain(staleContext); + }); + + test("returns undefined for empty context", () => { + expect(createMurphContextTransform(" ")).toBeUndefined(); + expect(createMurphContextTransform(undefined)).toBeUndefined(); + }); +}); + +function record(value: unknown): Record | undefined { + return value !== null && typeof value === "object" ? (value as Record) : undefined; +} + +function textContent(value: unknown): string { + const content = record(value)?.content; + if (typeof content === "string") return content; + if (!Array.isArray(content)) return ""; + return content + .map((item) => { + const block = record(item); + return block?.type === "text" && typeof block.text === "string" ? block.text : ""; + }) + .join("\n"); +} diff --git a/src/agent/__tests__/prompt-assembler.test.ts b/src/agent/__tests__/prompt-assembler.test.ts index 502ccff6..9378896f 100644 --- a/src/agent/__tests__/prompt-assembler.test.ts +++ b/src/agent/__tests__/prompt-assembler.test.ts @@ -161,4 +161,12 @@ describe("assemblePrompt UI vocabulary guidance", () => { const prompt = assemblePrompt(baseConfig); expect(prompt).toContain("public/_examples/"); }); + + test("distinguishes created page URLs from authentication links", () => { + const prompt = assemblePrompt(baseConfig); + expect(prompt).toContain("Page URLs and login URLs are different."); + expect(prompt).toContain("return the exact /ui/ page URL"); + expect(prompt).toContain("Only call phantom_generate_login"); + expect(prompt).toContain("Do not substitute"); + }); }); diff --git a/src/agent/agent-sdk.ts b/src/agent/agent-sdk.ts index b65410da..60b1dd33 100644 --- a/src/agent/agent-sdk.ts +++ b/src/agent/agent-sdk.ts @@ -35,6 +35,9 @@ export type { }; export type AgentSdkQueryParams = Parameters[0]; +export type AgentSdkQueryOptions = NonNullable & { + transformContext?: (messages: unknown[], signal?: AbortSignal) => Promise | unknown[]; +}; export type AgentSdkQuery = (params: AgentSdkQueryParams) => Query; export type AgentSdkRuntimeSelection = { agentRuntime: AgentRuntimeKind; diff --git a/src/agent/chat-query.ts b/src/agent/chat-query.ts index 3710b2e4..4bea2b58 100644 --- a/src/agent/chat-query.ts +++ b/src/agent/chat-query.ts @@ -1,7 +1,13 @@ // Extracted chat-specific query logic for the runForChat method. // Lives outside runtime.ts to keep that file under the 300-line budget. -import { type McpServerConfig, type SDKMessage, type SDKUserMessage, query } from "./agent-sdk.ts"; +import { + type AgentSdkQueryOptions, + type McpServerConfig, + type SDKMessage, + type SDKUserMessage, + query, +} from "./agent-sdk.ts"; type MessageParam = SDKUserMessage["message"]; import { buildAgentRuntimeEnv, resolveAgentRuntimeModel } from "../config/providers.ts"; @@ -14,6 +20,7 @@ import { type AgentCost, type AgentResponse, emptyCost } from "./events.ts"; import { createDangerousCommandBlocker, createFileTracker } from "./hooks.ts"; import { extractTextFromMessageParam } from "./message-param-utils.ts"; import { extractCost, extractTextFromMessage } from "./message-utils.ts"; +import { createMurphContextTransform } from "./murph-context.ts"; import { permissionOptionsFromConfig } from "./permission-options.ts"; import { assemblePrompt } from "./prompt-assembler.ts"; import { isNoConversationFoundResult, sdkResultErrorText } from "./sdk-result-errors.ts"; @@ -36,7 +43,7 @@ export async function executeChatQuery( sessionKey: string, message: MessageParam, startTime: number, - options: { signal: AbortSignal; onSdkEvent: (msg: SDKMessage) => void }, + options: { signal: AbortSignal; onSdkEvent: (msg: SDKMessage) => void; sessionContext?: string }, ): Promise { const parts = sessionKey.split(":"); const channelId = parts[0] ?? "web"; @@ -55,6 +62,7 @@ export async function executeChatQuery( /* Memory unavailable */ } } + const useMurphContextTransform = deps.config.agent_runtime === "murph"; const appendPrompt = assemblePrompt( deps.config, memoryContext, @@ -62,7 +70,9 @@ export async function executeChatQuery( deps.roleTemplate ?? undefined, deps.onboardingPrompt ?? undefined, undefined, + useMurphContextTransform ? undefined : options.sessionContext, ); + const transformContext = useMurphContextTransform ? createMurphContextTransform(options.sessionContext) : undefined; const queryModel = resolveAgentRuntimeModel(deps.config, deps.config.model); const providerEnv = buildAgentRuntimeEnv(deps.config, queryModel); @@ -93,30 +103,32 @@ export async function executeChatQuery( await Promise.all(Object.entries(deps.mcpServerFactories).map(async ([k, f]) => [k, await f()] as const)), ) : undefined; + const queryOptions: AgentSdkQueryOptions = { + model: queryModel, + ...permissionOptions, + settingSources: ["project", "user"], + systemPrompt: { + type: "preset" as const, + preset: "claude_code" as const, + append: appendPrompt, + }, + persistSession: true, + effort: deps.config.effort, + thinking: getThinkingConfig(queryModel), + includePartialMessages: true, + agentProgressSummaries: true, + promptSuggestions: true, + ...(deps.config.max_budget_usd > 0 ? { maxBudgetUsd: deps.config.max_budget_usd } : {}), + abortController: controller, + env: { ...process.env, ...providerEnv }, + hooks: { PreToolUse: [commandBlocker], PostToolUse: [fileTracker.hook] }, + ...(useResume && session?.sdk_session_id ? { resume: session.sdk_session_id } : {}), + ...(mcpServers ? { mcpServers } : {}), + ...(transformContext ? { transformContext } : {}), + }; const queryStream = query({ prompt: makePrompt(), - options: { - model: queryModel, - ...permissionOptions, - settingSources: ["project", "user"], - systemPrompt: { - type: "preset" as const, - preset: "claude_code" as const, - append: appendPrompt, - }, - persistSession: true, - effort: deps.config.effort, - thinking: getThinkingConfig(queryModel), - includePartialMessages: true, - agentProgressSummaries: true, - promptSuggestions: true, - ...(deps.config.max_budget_usd > 0 ? { maxBudgetUsd: deps.config.max_budget_usd } : {}), - abortController: controller, - env: { ...process.env, ...providerEnv }, - hooks: { PreToolUse: [commandBlocker], PostToolUse: [fileTracker.hook] }, - ...(useResume && session?.sdk_session_id ? { resume: session.sdk_session_id } : {}), - ...(mcpServers ? { mcpServers } : {}), - }, + options: queryOptions, }); for await (const msg of queryStream) { diff --git a/src/agent/murph-context.ts b/src/agent/murph-context.ts new file mode 100644 index 00000000..231086eb --- /dev/null +++ b/src/agent/murph-context.ts @@ -0,0 +1,72 @@ +export type MurphContextTransform = (messages: unknown[], signal?: AbortSignal) => Promise | unknown[]; + +const PHANTOM_CONTEXT_OPEN_TAG = ""; +const PHANTOM_CONTEXT_CLOSE_TAG = ""; + +type PhantomContextMessage = { + role: "user"; + content: [{ type: "text"; text: string }]; + timestamp: number; +}; + +export function createMurphContextTransform(context: string | undefined): MurphContextTransform | undefined { + const trimmed = context?.trim(); + if (!trimmed) return undefined; + + return (messages: unknown[]) => { + const cleaned = messages.filter((message) => !isPhantomContextMessage(message)); + const contextMessage = buildContextMessage(trimmed); + if (cleaned.length === 0) { + return [contextMessage]; + } + + const lastIndex = cleaned.length - 1; + const lastMessage = cleaned[lastIndex]; + if (hasRole(lastMessage, "user")) { + return [...cleaned.slice(0, lastIndex), contextMessage, lastMessage]; + } + + return [...cleaned, contextMessage]; + }; +} + +function buildContextMessage(content: string): PhantomContextMessage { + return { + role: "user", + content: [ + { + type: "text", + text: [ + PHANTOM_CONTEXT_OPEN_TAG, + "Durable context supplied by Phantom outside the raw transcript.", + "Use it to continue after Murph compaction without asking the user to repeat known app state.", + content, + PHANTOM_CONTEXT_CLOSE_TAG, + ].join("\n"), + }, + ], + timestamp: Date.now(), + }; +} + +function isPhantomContextMessage(message: unknown): boolean { + if (!isRecord(message) || message.role !== "user") return false; + const content = message.content; + if (typeof content === "string") return content.includes(PHANTOM_CONTEXT_OPEN_TAG); + if (!Array.isArray(content)) return false; + return content.some( + (item) => isRecord(item) && item.type === "text" && textField(item).includes(PHANTOM_CONTEXT_OPEN_TAG), + ); +} + +function hasRole(message: unknown, role: string): boolean { + return isRecord(message) && message.role === role; +} + +function isRecord(value: unknown): value is Record { + return value !== null && typeof value === "object"; +} + +function textField(record: Record): string { + return typeof record.text === "string" ? record.text : ""; +} diff --git a/src/agent/prompt-assembler.ts b/src/agent/prompt-assembler.ts index 1c134309..3f8ccc3f 100644 --- a/src/agent/prompt-assembler.ts +++ b/src/agent/prompt-assembler.ts @@ -18,6 +18,7 @@ export function assemblePrompt( roleTemplate?: RoleTemplate, onboardingPrompt?: string, dataDir?: string, + chatRuntimeContext?: string, ): string { const sections: string[] = []; @@ -74,6 +75,10 @@ export function assemblePrompt( sections.push(buildMemorySection(memoryContext)); } + if (chatRuntimeContext) { + sections.push(buildChatRuntimeContext(chatRuntimeContext)); + } + return sections.join("\n\n"); } @@ -140,9 +145,14 @@ function buildEnvironment(config: PhantomConfig): string { lines.push(""); lines.push("Schedule types: one-shot (at), interval (every N ms), cron (weekdays at 9am)."); lines.push(""); - lines.push("To give a user access to a /ui/ page, call phantom_generate_login to create a magic link"); - lines.push("and send the link to them via Slack. The link must be sent as plain text without any"); - lines.push("Markdown wrapping (no asterisks, no bold, no parentheses) so Slack renders it cleanly."); + lines.push("Page URLs and login URLs are different."); + lines.push("When the user asks for the page, link, profile, report, dashboard, or thing you created,"); + lines.push("return the exact /ui/ page URL from phantom_create_page or phantom_preview_page."); + lines.push("Only call phantom_generate_login when the user explicitly asks for access, auth,"); + lines.push("a login link, a magic link, or says they cannot open a page because login is required."); + lines.push("If you share a login link, label it as an authentication link. Do not substitute"); + lines.push("a login link for a created page URL."); + lines.push("Links must be sent as plain text without Markdown wrapping so Slack renders them cleanly."); lines.push(""); lines.push(...buildUIGuidanceLines(publicUrl ?? undefined)); lines.push(""); @@ -231,6 +241,10 @@ function buildMemorySection(memoryContext: string): string { return `# Your Memory\n\nPersistent memory from previous sessions. Use this to maintain continuity.\n\n${memoryContext}`; } +function buildChatRuntimeContext(chatRuntimeContext: string): string { + return `# Current Chat Context\n\n${chatRuntimeContext}`; +} + function buildFallbackRoleHint(config: PhantomConfig): string { return `Your role is ${config.role}. Approach every task with that expertise.`; } diff --git a/src/agent/prompt-blocks/ui-guidance.ts b/src/agent/prompt-blocks/ui-guidance.ts index 887d1a22..4eb8c5c1 100644 --- a/src/agent/prompt-blocks/ui-guidance.ts +++ b/src/agent/prompt-blocks/ui-guidance.ts @@ -147,6 +147,10 @@ export function buildUIGuidanceLines(publicUrl: string | undefined): string[] { lines.push("phantom_preview_page with the same path. Review the screenshot. Read the"); lines.push("JSON metadata block. If console.errors > 0 or network.failedRequests > 0,"); lines.push("fix the HTML and re-preview until both are zero. Only then report the page."); + lines.push("Preserve the exact page URL returned by phantom_create_page or preview metadata."); + lines.push("When the user later asks for the page or link you created, return that page URL."); + lines.push("Do not answer a page-link request by calling phantom_generate_login unless"); + lines.push("the user explicitly asks for an authentication link."); lines.push(""); if (publicUrl) { lines.push(`Pages are at ${publicUrl}/ui/`); diff --git a/src/agent/runtime.ts b/src/agent/runtime.ts index 43aaf1e6..64917696 100644 --- a/src/agent/runtime.ts +++ b/src/agent/runtime.ts @@ -126,7 +126,7 @@ export class AgentRuntime { async runForChat( sessionKey: string, message: MessageParam, - options: { signal: AbortSignal; onSdkEvent: (msg: SDKMessage) => void }, + options: { signal: AbortSignal; onSdkEvent: (msg: SDKMessage) => void; sessionContext?: string }, ): Promise { if (this.activeSessions.has(sessionKey)) { return { text: "Error: session busy", sessionId: "", cost: emptyCost(), durationMs: 0 }; diff --git a/src/chat/__tests__/continuity-context.test.ts b/src/chat/__tests__/continuity-context.test.ts new file mode 100644 index 00000000..dbbe9850 --- /dev/null +++ b/src/chat/__tests__/continuity-context.test.ts @@ -0,0 +1,109 @@ +import { Database } from "bun:sqlite"; +import { afterEach, beforeEach, describe, expect, test } from "bun:test"; +import { MIGRATIONS } from "../../db/schema.ts"; +import { buildChatContinuityContext } from "../continuity-context.ts"; +import { ChatEventLog } from "../event-log.ts"; +import { ChatSessionStore } from "../session-store.ts"; + +let db: Database; +let eventLog: ChatEventLog; +let sessionStore: ChatSessionStore; + +beforeEach(() => { + db = new Database(":memory:"); + for (const sql of MIGRATIONS) { + db.run(sql); + } + eventLog = new ChatEventLog(db); + sessionStore = new ChatSessionStore(db); +}); + +afterEach(() => { + db.close(); +}); + +describe("buildChatContinuityContext", () => { + test("summarizes created page artifacts from the durable stream log", () => { + const session = sessionStore.create(); + eventLog.append(session.id, null, 1, "message.tool_call_start", { + event: "message.tool_call_start", + tool_call_id: "tool-1", + tool_name: "phantom_create_page", + message_id: "assistant-1", + parent_tool_use_id: null, + is_mcp: true, + }); + eventLog.append(session.id, null, 2, "message.tool_call_input_end", { + event: "message.tool_call_input_end", + tool_call_id: "tool-1", + input: { + path: "muhammad-ahmed-cheema.html", + title: "Muhammad Ahmed Cheema Profile", + }, + }); + eventLog.append(session.id, null, 3, "message.tool_call_result", { + event: "message.tool_call_result", + tool_call_id: "tool-1", + tool_name: "phantom_create_page", + status: "success", + output: JSON.stringify({ + path: "muhammad-ahmed-cheema.html", + url: "http://127.0.0.1:3112/ui/muhammad-ahmed-cheema.html", + size: 12345, + }), + }); + + const context = buildChatContinuityContext({ sessionId: session.id, eventLog }); + + expect(context).toContain("User-visible page artifacts"); + expect(context).toContain("Muhammad Ahmed Cheema Profile"); + expect(context).toContain("http://127.0.0.1:3112/ui/muhammad-ahmed-cheema.html"); + expect(context).toContain("muhammad-ahmed-cheema.html"); + expect(context).not.toContain("/ui/login"); + }); + + test("skips login links and keeps recent compact checkpoints", () => { + const session = sessionStore.create(); + eventLog.append(session.id, null, 1, "session.compact_boundary", { + event: "session.compact_boundary", + trigger: "auto", + pre_tokens: 1434337, + }); + eventLog.append(session.id, null, 2, "message.tool_call_result", { + event: "message.tool_call_result", + tool_call_id: "tool-login", + tool_name: "phantom_generate_login", + status: "success", + output: JSON.stringify({ + magicLink: "http://127.0.0.1:3112/ui/login?magic=secret", + }), + }); + + const context = buildChatContinuityContext({ sessionId: session.id, eventLog }); + + expect(context).toContain("auto compaction at stream seq 1 before about 1,434,337 tokens."); + expect(context).toContain("Authentication links"); + expect(context).not.toContain("magic=secret"); + }); + + test("uses the latest stream events when the full event log is larger than the scan limit", () => { + const session = sessionStore.create(); + for (let seq = 1; seq <= 12; seq++) { + eventLog.append(session.id, null, seq, "session.status", { + event: "session.status", + status: "working", + permission_mode: "bypassPermissions", + }); + } + eventLog.append(session.id, null, 13, "session.compact_boundary", { + event: "session.compact_boundary", + trigger: "auto", + pre_tokens: 500000, + }); + + const context = buildChatContinuityContext({ sessionId: session.id, eventLog, limit: 3 }); + + expect(context).toContain("stream seq 13"); + expect(context).toContain("500,000"); + }); +}); diff --git a/src/chat/__tests__/writer.test.ts b/src/chat/__tests__/writer.test.ts index ed6167fb..68beaaea 100644 --- a/src/chat/__tests__/writer.test.ts +++ b/src/chat/__tests__/writer.test.ts @@ -36,7 +36,7 @@ function mockRuntime(overrides?: { runForChat?: ( key: string, msg: unknown, - opts: { signal: AbortSignal; onSdkEvent: (msg: unknown) => void }, + opts: { signal: AbortSignal; onSdkEvent: (msg: unknown) => void; sessionContext?: string }, ) => Promise<{ text: string; sessionId: string; @@ -305,6 +305,73 @@ describe("ChatSessionWriter", () => { expect(timelines[0]?.summary.status).toBe("completed"); }); + test("passes durable page context into the chat runtime", async () => { + const session = sessionStore.create(); + let capturedContext: string | undefined; + eventLog.append(session.id, null, 1, "message.tool_call_start", { + event: "message.tool_call_start", + tool_call_id: "tool-1", + tool_name: "phantom_create_page", + message_id: "assistant-1", + parent_tool_use_id: null, + is_mcp: true, + }); + eventLog.append(session.id, null, 2, "message.tool_call_input_end", { + event: "message.tool_call_input_end", + tool_call_id: "tool-1", + input: { + path: "profile.html", + title: "Profile Page", + }, + }); + eventLog.append(session.id, null, 3, "message.tool_call_result", { + event: "message.tool_call_result", + tool_call_id: "tool-1", + tool_name: "phantom_create_page", + status: "success", + output: JSON.stringify({ + path: "profile.html", + url: "http://127.0.0.1:3112/ui/profile.html", + }), + }); + + const writer = new ChatSessionWriter({ + sessionId: session.id, + runtime: mockRuntime({ + runForChat: async (_key, _message, opts) => { + capturedContext = opts.sessionContext; + opts.onSdkEvent({ + type: "result", + subtype: "success", + result: "ok", + stop_reason: "end_turn", + total_cost_usd: 0, + usage: {}, + modelUsage: {}, + duration_ms: 0, + num_turns: 1, + }); + return { + text: "ok", + sessionId: "sdk-1", + cost: { totalUsd: 0, inputTokens: 0, outputTokens: 0, modelUsage: {} }, + durationMs: 0, + }; + }, + }), + eventLog, + messageStore, + sessionStore, + streamBus, + }); + writer.claim(); + + await writer.run({ role: "user", content: "give me the page link" }, "t1", "give me the page link"); + + expect(capturedContext).toContain("Profile Page"); + expect(capturedContext).toContain("http://127.0.0.1:3112/ui/profile.html"); + }); + test("persists errored run timeline without committing assistant id", async () => { const session = sessionStore.create(); const writer = new ChatSessionWriter({ diff --git a/src/chat/continuity-context.ts b/src/chat/continuity-context.ts new file mode 100644 index 00000000..19e51949 --- /dev/null +++ b/src/chat/continuity-context.ts @@ -0,0 +1,213 @@ +import type { ChatEventLog, ChatStreamEvent } from "./event-log.ts"; + +const DEFAULT_EVENT_SCAN_LIMIT = 5000; +const MAX_ARTIFACTS = 8; +const MAX_COMPACTIONS = 3; +const MAX_LABEL_LENGTH = 90; +const PAGE_TOOLS = new Set(["phantom_create_page", "phantom_preview_page"]); + +type BuildChatContinuityContextInput = { + sessionId: string; + eventLog: ChatEventLog; + limit?: number; +}; + +type ToolAccumulator = { + seq: number; + toolName?: string; + input?: unknown; + output?: string; + status?: string; +}; + +type PageArtifact = { + seq: number; + toolName: string; + label: string; + url?: string; + path?: string; + size?: number; +}; + +type CompactCheckpoint = { + seq: number; + trigger?: string; + preTokens?: number; +}; + +export function buildChatContinuityContext(input: BuildChatContinuityContextInput): string | undefined { + const events = input.eventLog.tail(input.sessionId, input.limit ?? DEFAULT_EVENT_SCAN_LIMIT); + const tools = new Map(); + const compactions: CompactCheckpoint[] = []; + + for (const event of events) { + const payload = parsePayload(event); + if (!payload) continue; + const eventType = stringField(payload, "event") ?? event.event_type; + + if (eventType === "session.compact_boundary") { + compactions.push({ + seq: event.seq, + trigger: stringField(payload, "trigger"), + preTokens: numberField(payload, "pre_tokens"), + }); + continue; + } + + if (!eventType.startsWith("message.tool_call_")) continue; + const toolCallId = stringField(payload, "tool_call_id"); + if (!toolCallId) continue; + const tool = tools.get(toolCallId) ?? { seq: event.seq }; + tool.seq = event.seq; + + const toolName = stringField(payload, "tool_name"); + if (toolName) tool.toolName = toolName; + + if (eventType === "message.tool_call_input_end") { + tool.input = payload.input; + } else if (eventType === "message.tool_call_running") { + const outputPreview = stringField(payload, "output_preview"); + if (outputPreview && !tool.output) tool.output = outputPreview; + } else if (eventType === "message.tool_call_result") { + tool.status = stringField(payload, "status"); + tool.output = stringField(payload, "output") ?? stringField(payload, "output_preview") ?? tool.output; + } + + tools.set(toolCallId, tool); + } + + const artifacts = dedupeArtifacts([...tools.values()].flatMap((tool) => artifactFromTool(tool) ?? [])); + const latestCompactions = compactions.slice(-MAX_COMPACTIONS); + if (artifacts.length === 0 && latestCompactions.length === 0) { + return undefined; + } + + return renderContext({ + artifacts: artifacts.slice(-MAX_ARTIFACTS), + compactions: latestCompactions, + }); +} + +function renderContext(input: { artifacts: PageArtifact[]; compactions: CompactCheckpoint[] }): string { + const lines = [ + "Durable Phantom chat context:", + "- The transcript may have been compacted by Murph. Continue from the latest user message using these host facts when relevant.", + "- Authentication links from phantom_generate_login are not page artifacts.", + ]; + + if (input.compactions.length > 0) { + lines.push("", "Recent compaction checkpoints:"); + for (const checkpoint of input.compactions) { + const trigger = checkpoint.trigger ?? "unknown"; + const tokens = + checkpoint.preTokens === undefined + ? "" + : ` before about ${checkpoint.preTokens.toLocaleString("en-US")} tokens`; + lines.push(`- ${trigger} compaction at stream seq ${checkpoint.seq}${tokens}.`); + } + } + + if (input.artifacts.length > 0) { + lines.push("", "User-visible page artifacts from earlier tool work:"); + for (const artifact of input.artifacts) { + const parts = [`- ${artifact.label}`]; + if (artifact.url) parts.push(` URL: ${artifact.url}`); + if (artifact.path) parts.push(` path: ${artifact.path}`); + if (artifact.size !== undefined) parts.push(` size: ${artifact.size} bytes`); + parts.push(` via ${artifact.toolName} at stream seq ${artifact.seq}.`); + lines.push(parts.join(";")); + } + } + + return lines.join("\n"); +} + +function artifactFromTool(tool: ToolAccumulator): PageArtifact | undefined { + if (!tool.toolName || !PAGE_TOOLS.has(tool.toolName)) return undefined; + + const input = recordFromUnknown(tool.input); + const output = parseJsonRecord(tool.output); + const path = stringField(output, "path") ?? stringField(input, "path"); + const url = normalizePageUrl( + stringField(output, "url") ?? + stringField(output, "publicUrl") ?? + stringField(output, "pageUrl") ?? + urlFromText(tool.output), + ); + if (!url && !path) return undefined; + + const title = stringField(input, "title") ?? stringField(output, "title") ?? path ?? url ?? "Created page"; + const size = numberField(output, "size"); + return { + seq: tool.seq, + toolName: tool.toolName, + label: truncate(title, MAX_LABEL_LENGTH), + ...(url ? { url } : {}), + ...(path ? { path } : {}), + ...(size !== undefined ? { size } : {}), + }; +} + +function dedupeArtifacts(artifacts: PageArtifact[]): PageArtifact[] { + const byKey = new Map(); + for (const artifact of artifacts) { + const key = artifact.url ?? artifact.path ?? `${artifact.toolName}:${artifact.seq}`; + byKey.set(key, artifact); + } + return [...byKey.values()].sort((left, right) => left.seq - right.seq); +} + +function parsePayload(event: ChatStreamEvent): Record | undefined { + try { + const parsed = JSON.parse(event.payload_json); + return recordFromUnknown(parsed); + } catch { + return undefined; + } +} + +function parseJsonRecord(value: string | undefined): Record | undefined { + if (!value) return undefined; + try { + return recordFromUnknown(JSON.parse(value)); + } catch { + return undefined; + } +} + +function recordFromUnknown(value: unknown): Record | undefined { + if (value === null || typeof value !== "object" || Array.isArray(value)) { + return undefined; + } + return value as Record; +} + +function stringField(record: Record | undefined, key: string): string | undefined { + const value = record?.[key]; + if (typeof value !== "string") return undefined; + const trimmed = value.trim(); + return trimmed.length > 0 ? trimmed : undefined; +} + +function numberField(record: Record | undefined, key: string): number | undefined { + const value = record?.[key]; + return typeof value === "number" && Number.isFinite(value) ? value : undefined; +} + +function normalizePageUrl(url: string | undefined): string | undefined { + if (!url || !url.includes("/ui/") || url.includes("/ui/login")) { + return undefined; + } + return url; +} + +function urlFromText(text: string | undefined): string | undefined { + if (!text) return undefined; + const match = text.match(/https?:\/\/[^\s"']+\/ui\/[^\s"']+/); + return normalizePageUrl(match?.[0]); +} + +function truncate(value: string, maxLength: number): string { + if (value.length <= maxLength) return value; + return `${value.slice(0, maxLength - 3)}...`; +} diff --git a/src/chat/event-log.ts b/src/chat/event-log.ts index ecccb8e8..f8b89242 100644 --- a/src/chat/event-log.ts +++ b/src/chat/event-log.ts @@ -45,6 +45,19 @@ export class ChatEventLog { .all(sessionId, afterSeq, maxRows) as ChatStreamEvent[]; } + tail(sessionId: string, limit?: number): ChatStreamEvent[] { + const maxRows = limit ?? 5000; + const rows = this.db + .query( + `SELECT * FROM chat_stream_events + WHERE session_id = ? + ORDER BY seq DESC + LIMIT ?`, + ) + .all(sessionId, maxRows) as ChatStreamEvent[]; + return rows.reverse(); + } + getMaxSeq(sessionId: string): number { const row = this.db .query("SELECT MAX(seq) as max_seq FROM chat_stream_events WHERE session_id = ?") diff --git a/src/chat/writer.ts b/src/chat/writer.ts index dc579852..e87906a9 100644 --- a/src/chat/writer.ts +++ b/src/chat/writer.ts @@ -3,6 +3,7 @@ import type { SDKUserMessage } from "../agent/agent-sdk.ts"; type MessageParam = SDKUserMessage["message"]; import type { AgentRuntime } from "../agent/runtime.ts"; import { autoRenameSession } from "./auto-rename.ts"; +import { buildChatContinuityContext } from "./continuity-context.ts"; import type { ChatEventLog } from "./event-log.ts"; import type { ChatMessageStore } from "./message-store.ts"; import type { NotificationTriggerService } from "./notifications/triggers.ts"; @@ -99,8 +100,13 @@ export class ChatSessionWriter { let resultText = ""; try { + const sessionContext = buildChatContinuityContext({ + sessionId: this.deps.sessionId, + eventLog: this.deps.eventLog, + }); const response = await this.deps.runtime.runForChat(sessionKey, message, { signal: this.abortController.signal, + sessionContext, onSdkEvent: (sdkMsg: unknown) => { const frames = translateSdkMessage(sdkMsg as Record, ctx); for (const frame of frames) { diff --git a/src/ui/tools.ts b/src/ui/tools.ts index 507f0da0..56934335 100644 --- a/src/ui/tools.ts +++ b/src/ui/tools.ts @@ -27,7 +27,7 @@ export function createWebUiToolServer( "phantom_create_page", "Create or update an HTML page served at /ui/. If html is provided, writes it directly. " + "If title and content are provided instead, wraps the content in the base template. " + - "Returns the public URL of the page.", + "Returns the page URL to share when the user asks for the page you created.", { path: z.string().min(1).describe("File path relative to public/, e.g. 'dashboard.html' or 'reports/weekly.html'"), html: z.string().optional().describe("Full HTML content to write (use this for complete pages)"), @@ -77,6 +77,7 @@ export function createWebUiToolServer( path: safePath, url: publicUrl, size: htmlContent.length, + note: "This is the created page URL, not a login link.", }); } catch (error: unknown) { const msg = error instanceof Error ? error.message : String(error); @@ -87,7 +88,9 @@ export function createWebUiToolServer( const generateLoginTool = tool( "phantom_generate_login", - "Generate a magic link for web UI authentication. Send this link to the user via Slack. " + + "Generate a magic link for web UI authentication. Use only when the user asks for access, " + + "auth, login, a magic link, or says they cannot open a page because login is required. " + + "Do not use this to answer a request for the page URL of something you created. " + "The link expires in 10 minutes. After authentication, the session lasts 7 days.", {}, async () => { @@ -100,7 +103,7 @@ export function createWebUiToolServer( // sessionToken intentionally excluded - agent should only share the magic link expiresIn: "10 minutes", sessionDuration: "7 days", - note: "Send the magic link to the user via Slack. They click it and are authenticated instantly.", + note: "This is an authentication link, not a page URL. Send it only when login access is needed.", }); } catch (error: unknown) { const msg = error instanceof Error ? error.message : String(error);