From cdeaa3cc23ecaff7ce3fddf2d7c2165765dd81a0 Mon Sep 17 00:00:00 2001
From: handsdiff <239876380+handsdiff@users.noreply.github.com>
Date: Fri, 17 Apr 2026 09:12:39 -0400
Subject: [PATCH] fix(compressor): keep truncated tool_call arguments as valid
 JSON
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When _prune_old_tool_results truncates a large tool_call arguments
string, it was byte-slicing to 200 chars and appending the literal
sentinel "...[truncated]". The resulting string is not valid JSON
(unterminated mid-value), and some providers — notably Anthropic via
LiteLLM — re-parse function.arguments when rebuilding tool_use
blocks. The next API call fails with HTTP 400 ("Unterminated string
starting at: line 1 column 35"), and every subsequent turn replays
the same corrupted history, so the session is permanently stuck.

Observed in the wild: a session that compressed with 11 prior patch
tool_calls (each >500 chars) then returned HTTP 400 on every inbound
message. The 3 retries are identical payloads, so they all fail the
same way. The user sees only a canned error fallback.

Replace the byte slice with a compact JSON sentinel that preserves
provenance (original length + 200-char preview) while staying
parseable.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 agent/context_compressor.py            | 13 +++++++-
 tests/agent/test_context_compressor.py | 45 ++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 34ec5091b1c..ca3b6427bb6 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -459,7 +459,18 @@ def _prune_old_tool_results(
                 if isinstance(tc, dict):
                     args = tc.get("function", {}).get("arguments", "")
                     if len(args) > 500:
-                        tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
+                        # `arguments` is a JSON string. Some providers
+                        # (e.g. Anthropic via LiteLLM) re-parse it when
+                        # rebuilding tool_use blocks, so byte-slicing it
+                        # produces invalid JSON and the next request fails
+                        # with HTTP 400. Replace with a compact sentinel
+                        # object that stays valid JSON.
+                        truncated_payload = json.dumps({
+                            "_truncated": True,
+                            "_original_length": len(args),
+                            "_preview": args[:200],
+                        })
+                        tc = {**tc, "function": {**tc["function"], "arguments": truncated_payload}}
                         modified = True
                 new_tcs.append(tc)
             if modified:
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 6164d812f6b..a10ebb94d94 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -1,5 +1,7 @@
 """Tests for agent/context_compressor.py — compression logic, thresholds, truncation fallback."""
 
+import json
+
 import pytest
 from unittest.mock import patch, MagicMock
 
@@ -781,3 +783,46 @@ def test_prune_without_token_budget_uses_message_count(self, budget_compressor):
         # Tool at index 2 is outside the protected tail (last 3 = indices 2,3,4)
         # so it might or might not be pruned depending on boundary
         assert isinstance(pruned, int)
+
+    def test_pruned_tool_call_args_remain_valid_json(self, budget_compressor):
+        """Truncated tool_call `arguments` must stay parseable JSON.
+
+        Some providers (e.g. Anthropic via LiteLLM) re-parse the arguments
+        string when rebuilding tool_use blocks. If pruning leaves invalid
+        JSON in place, the next API call fails with HTTP 400 and the
+        session gets stuck — replaying the corrupted history on every turn.
+        """
+        c = budget_compressor
+        huge_args = json.dumps({
+            "mode": "replace",
+            "path": "/some/file.ts",
+            "old_string": "placeholder",
+            "new_string": "    const colMatches = sql.matchAll(/(\\w+)\\s+(text|integer)/gi);\n"
+                          + ("  // padding line to exceed 500 chars\n" * 40),
+        })
+        assert len(huge_args) > 500
+        messages = [
+            {"role": "user", "content": "start"},
+            {
+                "role": "assistant",
+                "content": None,
+                "tool_calls": [{
+                    "id": "c1",
+                    "type": "function",
+                    "function": {"name": "patch", "arguments": huge_args},
+                }],
+            },
+            {"role": "tool", "content": "ok", "tool_call_id": "c1"},
+            {"role": "user", "content": "recent"},
+            {"role": "assistant", "content": "reply"},
+        ]
+        result, _ = c._prune_old_tool_results(messages, protect_tail_count=2)
+        pruned_msg = next(m for m in result if m.get("tool_calls"))
+        pruned_args = pruned_msg["tool_calls"][0]["function"]["arguments"]
+        # Must be valid JSON — otherwise Anthropic re-parse will explode.
+        parsed = json.loads(pruned_args)
+        assert parsed["_truncated"] is True
+        assert parsed["_original_length"] == len(huge_args)
+        assert isinstance(parsed["_preview"], str) and parsed["_preview"]
+        # Sentinel must actually be smaller than the original.
+        assert len(pruned_args) < len(huge_args)