From a114261db30e92966d380180130f40eb6a707697 Mon Sep 17 00:00:00 2001 From: Wiktor Starczewski Date: Sat, 25 Apr 2026 10:19:51 +0200 Subject: [PATCH] fix(tools): inline read_tool_result metadata into the body text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some MCP consumers were surfacing only the populated StructuredContent block ({source, truncated, bytes}) back to the calling model, which experienced as "metadata-only" reads against large tool_result sidecars. Drop StructuredContent for this tool and prepend the same metadata as a single bracketed line above a blank line, then the body — making the body the unconditional source of truth. Other Phase-1 tools keep their markdown + StructuredContent shape; this is a targeted exception, not a deprecation of the convention. Adds a size-tiered e2e (TestE2E_ReadToolResult_Sizes) at 5K / 50K / 500K / 5M asserting both ends of the body survive the MCP transport at each tier — sentinels at byte 0 and ~32 bytes from the end. Confirms the server-side delivery contract; the display-side hypothesis (Claude Code's MCP client truncating large TextContent before the model sees it) is verified by hand against a real loopback per the Phase-2 plan's verification step 13. --- e2e/hearsay_e2e_test.go | 247 +++++++++++++++++++++++++++++++++++ internal/tools/tools.go | 42 +++--- internal/tools/tools_test.go | 114 ++++++++++++++-- 3 files changed, 372 insertions(+), 31 deletions(-) diff --git a/e2e/hearsay_e2e_test.go b/e2e/hearsay_e2e_test.go index 8c305c5..16b436e 100644 --- a/e2e/hearsay_e2e_test.go +++ b/e2e/hearsay_e2e_test.go @@ -404,6 +404,253 @@ func TestE2E_ClaudeMdRoundtrip(t *testing.T) { } } +// TestE2E_ReadToolResult_Sizes exercises the read_tool_result content +// path at four sizes (5K / 50K / 500K / 5M). PR 0 dropped this tool's +// StructuredContent block in favor of inlining `[source=…, bytes=…, +// truncated=…]` as the leading line of the body — partly because some +// MCP consumers were surfacing only the structured channel back to the +// model, which experienced as "metadata-only" reads. +// +// The test confirms the *server-side* delivery: bytes survive the MCP +// transport at every tier, with sentinel strings present at both ends of +// the body so we know nothing was silently dropped in the middle. +// +// (The display-side hypothesis — Claude Code's MCP client truncating +// large TextContent blocks before they reach the model — is verified by +// hand against a real Claude Code loopback; see Phase-2 plan +// verification step 13.) +func TestE2E_ReadToolResult_Sizes(t *testing.T) { + tiers := []struct { + label string + size int + maxBytes int // request override; 0 => use server default (64KB) + }{ + {"5k", 5 * 1024, 16 * 1024}, + {"50k", 50 * 1024, 100 * 1024}, + {"500k", 500 * 1024, 1024 * 1024}, + {"5m", 5 * 1024 * 1024, 10 * 1024 * 1024}, + } + + // One scratch HOME + data dir for the whole test, with a single + // fixture session that references every sidecar via its own + // tool_use_id. Each tier still makes its own MCP call. + home := t.TempDir() + dataDir := t.TempDir() + + sessionID := "size-tier-session" + projectDir := filepath.Join(dataDir, "projects", "-tmp-e2e-sizes") + if err := os.MkdirAll(filepath.Join(projectDir, sessionID, "tool-results"), 0o755); err != nil { + t.Fatalf("mkdir tool-results: %v", err) + } + sessionPath := filepath.Join(projectDir, sessionID+".jsonl") + + // Build the JSONL: one user prompt, then for each tier an + // assistant tool_use + a user tool_result whose content text + // embeds the absolute sidecar path so the parser can find it. + var lines []string + lines = append(lines, + `{"type":"user","uuid":"u1","timestamp":"2026-04-24T10:00:00Z","sessionId":"`+sessionID+`","message":{"role":"user","content":"size tier fixture"}}`, + ) + for i, tier := range tiers { + toolUseID := "toolu_size_" + tier.label + sidecarPath := filepath.Join(projectDir, sessionID, "tool-results", toolUseID+".txt") + writeSizedSidecar(t, sidecarPath, tier.size, tier.label) + + uuidA := fmt.Sprintf("a%d", i+1) + uuidU := fmt.Sprintf("u%d", i+2) + parentA := "u1" + if i > 0 { + parentA = fmt.Sprintf("u%d", i+1) + } + // Assistant fires Read tool. + lines = append(lines, + `{"type":"assistant","uuid":"`+uuidA+`","parentUuid":"`+parentA+ + `","timestamp":"2026-04-24T10:00:01Z","sessionId":"`+sessionID+ + `","message":{"role":"assistant","content":[{"type":"tool_use","id":"`+toolUseID+ + `","name":"Read","input":{"file_path":"/tmp/`+tier.label+`.txt"}}]}}`, + ) + // Tool result text references the sidecar path (matches + // `/\S+/tool-results/.txt` regex). + lines = append(lines, + `{"type":"user","uuid":"`+uuidU+`","parentUuid":"`+uuidA+ + `","timestamp":"2026-04-24T10:00:02Z","sessionId":"`+sessionID+ + `","message":{"role":"user","content":[{"type":"tool_result","tool_use_id":"`+toolUseID+ + `","content":"`+sidecarPath+`\n\nPreview (first 2KB):\n…"}]}}`, + ) + } + if err := os.WriteFile(sessionPath, []byte(strings.Join(lines, "\n")+"\n"), 0o644); err != nil { + t.Fatalf("write session jsonl: %v", err) + } + + // Spin up hearsay against this scratch tree and connect. + f := startServerAt(t, "size-tier-peer", home, dataDir) + cs := f.connectMCP(t) + ctx := context.Background() + + for _, tier := range tiers { + t.Run(tier.label, func(t *testing.T) { + args := map[string]any{ + "sessionId": sessionID, + "toolUseId": "toolu_size_" + tier.label, + } + if tier.maxBytes > 0 { + args["maxBytes"] = tier.maxBytes + } + res, err := cs.CallTool(ctx, &mcp.CallToolParams{ + Name: "read_tool_result", + Arguments: args, + }) + if err != nil { + t.Fatalf("call read_tool_result: %v", err) + } + if res.IsError { + var msgs []string + for _, c := range res.Content { + if tc, ok := c.(*mcp.TextContent); ok { + msgs = append(msgs, tc.Text) + } + } + t.Fatalf("read_tool_result errored: %s", strings.Join(msgs, " | ")) + } + if len(res.Content) == 0 { + t.Fatalf("no content blocks in response") + } + tc, ok := res.Content[0].(*mcp.TextContent) + if !ok { + t.Fatalf("first content is %T, want *mcp.TextContent", res.Content[0]) + } + text := tc.Text + + // 1. Metadata header is present and correctly parsed. + sep := "\n\n" + idx := strings.Index(text, sep) + if idx < 0 { + t.Fatalf("response missing metadata header: %q", truncate(text, 200)) + } + header := text[:idx] + body := text[idx+len(sep):] + wantHeaderPrefix := "[source=sidecar, bytes=" + if !strings.HasPrefix(header, wantHeaderPrefix) { + t.Errorf("header = %q, want prefix %q", header, wantHeaderPrefix) + } + if !strings.Contains(header, "truncated=false") { + t.Errorf("expected truncated=false in header (maxBytes=%d, size=%d); got %q", + tier.maxBytes, tier.size, header) + } + + // 2. Body sentinels survived end-to-end. The fixture + // places START-SENTINEL-