Add context truncation logic for react (#7780)

chenmoneygithub · web-flow · commit 6a6a25ebbc85 · 2025-02-06T16:54:09.000-08:00
diff --git a/dspy/adapters/base.py b/dspy/adapters/base.py
@@ -1,7 +1,10 @@
 from abc import ABC, abstractmethod
 
+from litellm import ContextWindowExceededError
+
 from dspy.utils.callback import with_callbacks
 
+
 class Adapter(ABC):
     def __init__(self, callbacks=None):
         self.callbacks = callbacks or []
@@ -40,6 +43,9 @@ def __call__(self, lm, lm_kwargs, signature, demos, inputs):
             return values
 
         except Exception as e:
+            if isinstance(e, ContextWindowExceededError):
+                # On context window exceeded error, we don't want to retry with a different adapter.
+                raise e
             from .json_adapter import JSONAdapter
             if not isinstance(self, JSONAdapter):
                 return JSONAdapter()(lm, lm_kwargs, signature, demos, inputs)
diff --git a/dspy/predict/react.py b/dspy/predict/react.py
@@ -1,12 +1,16 @@
+import logging
 from typing import Any, Callable, Literal, get_origin
 
+from litellm import ContextWindowExceededError
 from pydantic import BaseModel
 
 import dspy
 from dspy.primitives.program import Module
 from dspy.primitives.tool import Tool
 from dspy.signatures.signature import ensure_signature
 
+logger = logging.getLogger(__name__)
+
 
 class ReAct(Module):
     def __init__(self, signature, tools: list[Callable], max_iters=5):
@@ -32,15 +36,11 @@ def __init__(self, signature, tools: list[Callable], max_iters=5):
             ]
         )
 
-        finish_desc = (
-            f"Signals that the final outputs, i.e. {outputs}, are now available and marks the task as complete."
-        )
-        finish_args = {}  # k: v.annotation for k, v in signature.output_fields.items()}
         tools["finish"] = Tool(
             func=lambda **kwargs: "Completed.",
             name="finish",
-            desc=finish_desc,
-            args=finish_args,
+            desc=f"Signals that the final outputs, i.e. {outputs}, are now available and marks the task as complete.",
+            args={},
         )
 
         for idx, tool in enumerate(tools.values()):
@@ -66,18 +66,15 @@ def __init__(self, signature, tools: list[Callable], max_iters=5):
         self.react = dspy.Predict(react_signature)
         self.extract = dspy.ChainOfThought(fallback_signature)
 
-    def forward(self, **input_args):
-        def format(trajectory: dict[str, Any], last_iteration: bool):
-            adapter = dspy.settings.adapter or dspy.ChatAdapter()
-            trajectory_signature = dspy.Signature(f"{', '.join(trajectory.keys())} -> x")
-            return adapter.format_fields(trajectory_signature, trajectory, role="user")
+    def _format_trajectory(self, trajectory: dict[str, Any]):
+        adapter = dspy.settings.adapter or dspy.ChatAdapter()
+        trajectory_signature = dspy.Signature(f"{', '.join(trajectory.keys())} -> x")
+        return adapter.format_fields(trajectory_signature, trajectory, role="user")
 
+    def forward(self, **input_args):
         trajectory = {}
         for idx in range(self.max_iters):
-            pred = self.react(
-                **input_args,
-                trajectory=format(trajectory, last_iteration=(idx == self.max_iters - 1)),
-            )
+            pred = self._call_with_potential_trajectory_truncation(self.react, trajectory, **input_args)
 
             trajectory[f"thought_{idx}"] = pred.next_thought
             trajectory[f"tool_name_{idx}"] = pred.next_tool_name
@@ -102,9 +99,38 @@ def format(trajectory: dict[str, Any], last_iteration: bool):
             if pred.next_tool_name == "finish":
                 break
 
-        extract = self.extract(**input_args, trajectory=format(trajectory, last_iteration=False))
+        extract = self._call_with_potential_trajectory_truncation(self.extract, trajectory, **input_args)
         return dspy.Prediction(trajectory=trajectory, **extract)
 
+    def _call_with_potential_trajectory_truncation(self, module, trajectory, **input_args):
+        while True:
+            try:
+                return module(
+                    **input_args,
+                    trajectory=self._format_trajectory(trajectory),
+                )
+            except ContextWindowExceededError:
+                logger.warning("Trajectory exceeded the context window, truncating the oldest tool call information.")
+                trajectory = self.truncate_trajectory(trajectory)
+
+    def truncate_trajectory(self, trajectory):
+        """Truncates the trajectory so that it fits in the context window.
+
+        Users can override this method to implement their own truncation logic.
+        """
+        keys = list(trajectory.keys())
+        if len(keys) < 4:
+            # Every tool call has 4 keys: thought, tool_name, tool_args, and observation.
+            raise ValueError(
+                "The trajectory is too long so your prompt exceeded the context window, but the trajectory cannot be "
+                "truncated because it only has one tool call."
+            )
+
+        for key in keys[:4]:
+            trajectory.pop(key)
+
+        return trajectory
+
 
 """
 Thoughts and Planned Improvements for dspy.ReAct.
diff --git a/tests/predict/test_react.py b/tests/predict/test_react.py
@@ -5,6 +5,7 @@
 import dspy
 from dspy.predict import react
 from dspy.utils.dummies import DummyLM, dummy_rm
+import litellm
 
 # def test_example_no_tools():
 #     # Create a simple dataset which the model will use with the Retrieve tool.
@@ -260,3 +261,44 @@ def foo(a, b):
         "observation_1": "Completed.",
     }
     assert outputs.trajectory == expected_trajectory
+
+
+def test_trajectory_truncation():
+    # Create a simple tool for testing
+    def echo(text: str) -> str:
+        return f"Echoed: {text}"
+
+    # Create ReAct instance with our echo tool
+    react = dspy.ReAct("input_text -> output_text", tools=[echo])
+
+    # Mock react.react to simulate multiple tool calls
+    call_count = 0
+
+    def mock_react(**kwargs):
+        nonlocal call_count
+        call_count += 1
+
+        if call_count < 3:
+            # First 2 calls use the echo tool
+            return dspy.Prediction(
+                next_thought=f"Thought {call_count}",
+                next_tool_name="echo",
+                next_tool_args={"text": f"Text {call_count}"},
+            )
+        elif call_count == 3:
+            # The 3rd call raises context window exceeded error
+            raise litellm.ContextWindowExceededError("Context window exceeded", "dummy_model", "dummy_provider")
+        else:
+            # The 4th call finishes
+            return dspy.Prediction(next_thought="Final thought", next_tool_name="finish", next_tool_args={})
+
+    react.react = mock_react
+    react.extract = lambda **kwargs: dspy.Prediction(output_text="Final output")
+
+    # Call forward and get the result
+    result = react(input_text="test input")
+
+    # Verify that older entries in the trajectory were truncated
+    assert "thought_0" not in result.trajectory
+    assert "thought_2" in result.trajectory
+    assert result.output_text == "Final output"