diff --git a/lib/chat/tools/__tests__/createPromptSandboxStreamingTool.test.ts b/lib/chat/tools/__tests__/createPromptSandboxStreamingTool.test.ts index 0609f0a5..66f08e28 100644 --- a/lib/chat/tools/__tests__/createPromptSandboxStreamingTool.test.ts +++ b/lib/chat/tools/__tests__/createPromptSandboxStreamingTool.test.ts @@ -239,6 +239,28 @@ describe("createPromptSandboxStreamingTool", () => { }); }); + describe("description explains runId background task behavior", () => { + it("tells LLM not to interpret empty output when runId is present", () => { + const tool = createPromptSandboxStreamingTool("acc_1", "key_1"); + + expect(tool.description).toContain("runId"); + expect(tool.description).toContain("background task"); + expect(tool.description).toContain("do NOT"); + }); + + it("tells LLM not to poll task status", () => { + const tool = createPromptSandboxStreamingTool("acc_1", "key_1"); + + expect(tool.description).toContain("Do NOT automatically poll"); + }); + + it("tells LLM to let user know they can ask for status", () => { + const tool = createPromptSandboxStreamingTool("acc_1", "key_1"); + + expect(tool.description).toContain("ask you to check"); + }); + }); + describe("description mentions release management", () => { it("includes release management as a primary use case", () => { const tool = createPromptSandboxStreamingTool("acc_1", "key_1"); diff --git a/lib/chat/tools/createPromptSandboxStreamingTool.ts b/lib/chat/tools/createPromptSandboxStreamingTool.ts index e7a1d7e4..8cddbb18 100644 --- a/lib/chat/tools/createPromptSandboxStreamingTool.ts +++ b/lib/chat/tools/createPromptSandboxStreamingTool.ts @@ -52,7 +52,12 @@ export function createPromptSandboxStreamingTool( "file operations, data analysis, content generation, and any multi-step task. " + "The sandbox has skills for managing RELEASE.md documents, generating deliverables, and more. " + "Reuses the account's existing running sandbox or creates one from the latest snapshot. " + - "Streams output in real-time.", + "Streams output in real-time. " + + "IMPORTANT: When the result contains a `runId`, it means the sandbox is being set up for the first time " + + "and the command was dispatched to a background task. The output will be empty because the task is still running. " + + "The UI automatically shows a live progress view for background tasks — do NOT summarize or interpret the empty output. " + + "Simply tell the user their request is being processed in the sandbox and the results will appear in the task progress view above. " + + "Do NOT automatically poll or check the task status — instead, let the user know they can ask you to check on it whenever they want.", inputSchema: promptSandboxSchema, execute: async function* ({ prompt }, { abortSignal }) { yield { status: "booting" as const, output: "" };