From cdeaa3cc23ecaff7ce3fddf2d7c2165765dd81a0 Mon Sep 17 00:00:00 2001 From: handsdiff <239876380+handsdiff@users.noreply.github.com> Date: Fri, 17 Apr 2026 09:12:39 -0400 Subject: [PATCH] fix(compressor): keep truncated tool_call arguments as valid JSON MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When _prune_old_tool_results truncates a large tool_call arguments string, it was byte-slicing to 200 chars and appending the literal sentinel "...[truncated]". The resulting string is not valid JSON (unterminated mid-value), and some providers — notably Anthropic via LiteLLM — re-parse function.arguments when rebuilding tool_use blocks. The next API call fails with HTTP 400 ("Unterminated string starting at: line 1 column 35"), and every subsequent turn replays the same corrupted history, so the session is permanently stuck. Observed in the wild: a session that compressed with 11 prior patch tool_calls (each >500 chars) then returned HTTP 400 on every inbound message. The 3 retries are identical payloads, so they all fail the same way. The user sees only a canned error fallback. Replace the byte slice with a compact JSON sentinel that preserves provenance (original length + 200-char preview) while staying parseable. Co-Authored-By: Claude Opus 4.7 (1M context) --- agent/context_compressor.py | 13 +++++++- tests/agent/test_context_compressor.py | 45 ++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 34ec5091b1c..ca3b6427bb6 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -459,7 +459,18 @@ def _prune_old_tool_results( if isinstance(tc, dict): args = tc.get("function", {}).get("arguments", "") if len(args) > 500: - tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}} + # `arguments` is a JSON string. Some providers + # (e.g. Anthropic via LiteLLM) re-parse it when + # rebuilding tool_use blocks, so byte-slicing it + # produces invalid JSON and the next request fails + # with HTTP 400. Replace with a compact sentinel + # object that stays valid JSON. + truncated_payload = json.dumps({ + "_truncated": True, + "_original_length": len(args), + "_preview": args[:200], + }) + tc = {**tc, "function": {**tc["function"], "arguments": truncated_payload}} modified = True new_tcs.append(tc) if modified: diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 6164d812f6b..a10ebb94d94 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -1,5 +1,7 @@ """Tests for agent/context_compressor.py — compression logic, thresholds, truncation fallback.""" +import json + import pytest from unittest.mock import patch, MagicMock @@ -781,3 +783,46 @@ def test_prune_without_token_budget_uses_message_count(self, budget_compressor): # Tool at index 2 is outside the protected tail (last 3 = indices 2,3,4) # so it might or might not be pruned depending on boundary assert isinstance(pruned, int) + + def test_pruned_tool_call_args_remain_valid_json(self, budget_compressor): + """Truncated tool_call `arguments` must stay parseable JSON. + + Some providers (e.g. Anthropic via LiteLLM) re-parse the arguments + string when rebuilding tool_use blocks. If pruning leaves invalid + JSON in place, the next API call fails with HTTP 400 and the + session gets stuck — replaying the corrupted history on every turn. + """ + c = budget_compressor + huge_args = json.dumps({ + "mode": "replace", + "path": "/some/file.ts", + "old_string": "placeholder", + "new_string": " const colMatches = sql.matchAll(/(\\w+)\\s+(text|integer)/gi);\n" + + (" // padding line to exceed 500 chars\n" * 40), + }) + assert len(huge_args) > 500 + messages = [ + {"role": "user", "content": "start"}, + { + "role": "assistant", + "content": None, + "tool_calls": [{ + "id": "c1", + "type": "function", + "function": {"name": "patch", "arguments": huge_args}, + }], + }, + {"role": "tool", "content": "ok", "tool_call_id": "c1"}, + {"role": "user", "content": "recent"}, + {"role": "assistant", "content": "reply"}, + ] + result, _ = c._prune_old_tool_results(messages, protect_tail_count=2) + pruned_msg = next(m for m in result if m.get("tool_calls")) + pruned_args = pruned_msg["tool_calls"][0]["function"]["arguments"] + # Must be valid JSON — otherwise Anthropic re-parse will explode. + parsed = json.loads(pruned_args) + assert parsed["_truncated"] is True + assert parsed["_original_length"] == len(huge_args) + assert isinstance(parsed["_preview"], str) and parsed["_preview"] + # Sentinel must actually be smaller than the original. + assert len(pruned_args) < len(huge_args)