From 2ae53f29b99c083675759cf1bf67dccbf8010acd Mon Sep 17 00:00:00 2001 From: Sweets Sweetman Date: Wed, 4 Mar 2026 10:43:01 -0500 Subject: [PATCH 1/3] feat: inform LLM about runId background task behavior in prompt_sandbox When the sandbox is being set up for the first time, prompt_sandbox dispatches the command to a background task and returns a runId with empty output. The LLM was unaware of this and would try to interpret the empty result. Updated the tool description to explain that runId means a background task is running and the UI shows live progress. Co-Authored-By: Claude Opus 4.6 --- lib/chat/tools/createPromptSandboxStreamingTool.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/chat/tools/createPromptSandboxStreamingTool.ts b/lib/chat/tools/createPromptSandboxStreamingTool.ts index e7a1d7e4..916b2ccc 100644 --- a/lib/chat/tools/createPromptSandboxStreamingTool.ts +++ b/lib/chat/tools/createPromptSandboxStreamingTool.ts @@ -52,7 +52,11 @@ export function createPromptSandboxStreamingTool( "file operations, data analysis, content generation, and any multi-step task. " + "The sandbox has skills for managing RELEASE.md documents, generating deliverables, and more. " + "Reuses the account's existing running sandbox or creates one from the latest snapshot. " + - "Streams output in real-time.", + "Streams output in real-time. " + + "IMPORTANT: When the result contains a `runId`, it means the sandbox is being set up for the first time " + + "and the command was dispatched to a background task. The output will be empty because the task is still running. " + + "The UI automatically shows a live progress view for background tasks — do NOT summarize or interpret the empty output. " + + "Simply tell the user their request is being processed in the sandbox and the results will appear in the task progress view above.", inputSchema: promptSandboxSchema, execute: async function* ({ prompt }, { abortSignal }) { yield { status: "booting" as const, output: "" }; From 1b23496ee5989c034561dc761ee3ce463cb77504 Mon Sep 17 00:00:00 2001 From: Sweets Sweetman Date: Wed, 4 Mar 2026 11:03:51 -0500 Subject: [PATCH 2/3] feat: tell LLM not to poll task status, let user ask instead Co-Authored-By: Claude Opus 4.6 --- lib/chat/tools/createPromptSandboxStreamingTool.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/chat/tools/createPromptSandboxStreamingTool.ts b/lib/chat/tools/createPromptSandboxStreamingTool.ts index 916b2ccc..8cddbb18 100644 --- a/lib/chat/tools/createPromptSandboxStreamingTool.ts +++ b/lib/chat/tools/createPromptSandboxStreamingTool.ts @@ -56,7 +56,8 @@ export function createPromptSandboxStreamingTool( "IMPORTANT: When the result contains a `runId`, it means the sandbox is being set up for the first time " + "and the command was dispatched to a background task. The output will be empty because the task is still running. " + "The UI automatically shows a live progress view for background tasks — do NOT summarize or interpret the empty output. " + - "Simply tell the user their request is being processed in the sandbox and the results will appear in the task progress view above.", + "Simply tell the user their request is being processed in the sandbox and the results will appear in the task progress view above. " + + "Do NOT automatically poll or check the task status — instead, let the user know they can ask you to check on it whenever they want.", inputSchema: promptSandboxSchema, execute: async function* ({ prompt }, { abortSignal }) { yield { status: "booting" as const, output: "" }; From 8ad50f0a554e40cdadc3cbf47c03a4a2e8a4140a Mon Sep 17 00:00:00 2001 From: Sweets Sweetman Date: Wed, 4 Mar 2026 11:06:43 -0500 Subject: [PATCH 3/3] test: verify prompt_sandbox description explains runId behavior Co-Authored-By: Claude Opus 4.6 --- .../createPromptSandboxStreamingTool.test.ts | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/lib/chat/tools/__tests__/createPromptSandboxStreamingTool.test.ts b/lib/chat/tools/__tests__/createPromptSandboxStreamingTool.test.ts index 0609f0a5..66f08e28 100644 --- a/lib/chat/tools/__tests__/createPromptSandboxStreamingTool.test.ts +++ b/lib/chat/tools/__tests__/createPromptSandboxStreamingTool.test.ts @@ -239,6 +239,28 @@ describe("createPromptSandboxStreamingTool", () => { }); }); + describe("description explains runId background task behavior", () => { + it("tells LLM not to interpret empty output when runId is present", () => { + const tool = createPromptSandboxStreamingTool("acc_1", "key_1"); + + expect(tool.description).toContain("runId"); + expect(tool.description).toContain("background task"); + expect(tool.description).toContain("do NOT"); + }); + + it("tells LLM not to poll task status", () => { + const tool = createPromptSandboxStreamingTool("acc_1", "key_1"); + + expect(tool.description).toContain("Do NOT automatically poll"); + }); + + it("tells LLM to let user know they can ask for status", () => { + const tool = createPromptSandboxStreamingTool("acc_1", "key_1"); + + expect(tool.description).toContain("ask you to check"); + }); + }); + describe("description mentions release management", () => { it("includes release management as a primary use case", () => { const tool = createPromptSandboxStreamingTool("acc_1", "key_1");