diff --git a/src/agents/models/openai_responses.py b/src/agents/models/openai_responses.py index e060fb8ed..9f02d36bf 100644 --- a/src/agents/models/openai_responses.py +++ b/src/agents/models/openai_responses.py @@ -366,7 +366,7 @@ def _convert_tool(cls, tool: Tool) -> tuple[ToolParam, IncludeLiteral | None]: "display_width": tool.computer.dimensions[0], "display_height": tool.computer.dimensions[1], } - includes = None + includes = "computer_call_output.output.image_url" else: raise UserError(f"Unknown tool type: {type(tool)}, tool") diff --git a/tests/test_openai_responses_converter.py b/tests/test_openai_responses_converter.py index 34cbac5c5..8e6d9b31a 100644 --- a/tests/test_openai_responses_converter.py +++ b/tests/test_openai_responses_converter.py @@ -155,7 +155,9 @@ def drag(self, path: list[tuple[int, int]]) -> None: assert isinstance(converted.includes, list) # The includes list should have exactly the include for file search when include_search_results # is True. - assert converted.includes == ["file_search_call.results"] + assert "file_search_call.results" in converted.includes + # The includes list should also have the include for computer tool screenshots + assert "computer_call_output.output.image_url" in converted.includes # There should be exactly four converted tool dicts. assert len(converted.tools) == 4 # Extract types and verify. diff --git a/tests/test_reference_id_screenshots.py b/tests/test_reference_id_screenshots.py new file mode 100644 index 000000000..277001d01 --- /dev/null +++ b/tests/test_reference_id_screenshots.py @@ -0,0 +1,53 @@ +"""Tests for the reference ID screenshot mechanism in ComputerAction.""" + +from unittest.mock import MagicMock, patch + +import pytest +from agents import Agent, RunConfig, RunContextWrapper, RunHooks +from agents._run_impl import ComputerAction, ToolRunComputerAction +from agents.items import ToolCallOutputItem +from agents.tool import ComputerTool +from openai.types.responses.response_computer_tool_call import ( + ActionClick, + ResponseComputerToolCall, +) +from tests.test_computer_action import LoggingComputer + + +@pytest.mark.asyncio +@patch("agents.models.openai_provider._openai_shared.get_default_openai_client") +@patch("openai.AsyncOpenAI") +async def test_reference_id_screenshots(mock_openai, mock_get_client): + """Test that screenshots are sent with reference IDs.""" + # Mock the OpenAI client to avoid API key requirement + mock_get_client.return_value = MagicMock() + computer = LoggingComputer(screenshot_return="test_screenshot") + comptool = ComputerTool(computer=computer) + # Create a dummy click action + action = ActionClick(type="click", x=1, y=2, button="left") + tool_call = ResponseComputerToolCall( + id="tool1", + type="computer_call", + action=action, + call_id="tool1", + pending_safety_checks=[], + status="completed", + ) + # Setup agent and hooks + agent = Agent(name="test_agent", tools=[comptool]) + run_hooks = RunHooks() + context_wrapper = RunContextWrapper(context=None) + # Execute the computer action + output_item = await ComputerAction.execute( + agent=agent, + action=ToolRunComputerAction(tool_call=tool_call, computer_tool=comptool), + hooks=run_hooks, + context_wrapper=context_wrapper, + config=RunConfig(), + ) + # Verify that the output item has the correct structure + assert isinstance(output_item, ToolCallOutputItem) + assert "data:image/png;base64," in output_item.output + # Verify that the screenshot was generated + screenshot_calls = [call for call in computer.calls if call[0] == "screenshot"] + assert len(screenshot_calls) == 1