Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion agent/context_compressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,18 @@ def _prune_old_tool_results(
if isinstance(tc, dict):
args = tc.get("function", {}).get("arguments", "")
if len(args) > 500:
tc = {**tc, "function": {**tc["function"], "arguments": args[:200] + "...[truncated]"}}
# `arguments` is a JSON string. Some providers
# (e.g. Anthropic via LiteLLM) re-parse it when
# rebuilding tool_use blocks, so byte-slicing it
# produces invalid JSON and the next request fails
# with HTTP 400. Replace with a compact sentinel
# object that stays valid JSON.
truncated_payload = json.dumps({
"_truncated": True,
"_original_length": len(args),
"_preview": args[:200],
})
tc = {**tc, "function": {**tc["function"], "arguments": truncated_payload}}
modified = True
new_tcs.append(tc)
if modified:
Expand Down
45 changes: 45 additions & 0 deletions tests/agent/test_context_compressor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Tests for agent/context_compressor.py — compression logic, thresholds, truncation fallback."""

import json

import pytest
from unittest.mock import patch, MagicMock

Expand Down Expand Up @@ -781,3 +783,46 @@ def test_prune_without_token_budget_uses_message_count(self, budget_compressor):
# Tool at index 2 is outside the protected tail (last 3 = indices 2,3,4)
# so it might or might not be pruned depending on boundary
assert isinstance(pruned, int)

def test_pruned_tool_call_args_remain_valid_json(self, budget_compressor):
"""Truncated tool_call `arguments` must stay parseable JSON.

Some providers (e.g. Anthropic via LiteLLM) re-parse the arguments
string when rebuilding tool_use blocks. If pruning leaves invalid
JSON in place, the next API call fails with HTTP 400 and the
session gets stuck — replaying the corrupted history on every turn.
"""
c = budget_compressor
huge_args = json.dumps({
"mode": "replace",
"path": "/some/file.ts",
"old_string": "placeholder",
"new_string": " const colMatches = sql.matchAll(/(\\w+)\\s+(text|integer)/gi);\n"
+ (" // padding line to exceed 500 chars\n" * 40),
})
assert len(huge_args) > 500
messages = [
{"role": "user", "content": "start"},
{
"role": "assistant",
"content": None,
"tool_calls": [{
"id": "c1",
"type": "function",
"function": {"name": "patch", "arguments": huge_args},
}],
},
{"role": "tool", "content": "ok", "tool_call_id": "c1"},
{"role": "user", "content": "recent"},
{"role": "assistant", "content": "reply"},
]
result, _ = c._prune_old_tool_results(messages, protect_tail_count=2)
pruned_msg = next(m for m in result if m.get("tool_calls"))
pruned_args = pruned_msg["tool_calls"][0]["function"]["arguments"]
# Must be valid JSON — otherwise Anthropic re-parse will explode.
parsed = json.loads(pruned_args)
assert parsed["_truncated"] is True
assert parsed["_original_length"] == len(huge_args)
assert isinstance(parsed["_preview"], str) and parsed["_preview"]
# Sentinel must actually be smaller than the original.
assert len(pruned_args) < len(huge_args)
Loading