From d601cf0528ddb57ea34fe41fab5bd0c1439abf62 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Thu, 12 Mar 2026 13:30:29 -0400 Subject: [PATCH 1/3] fix: Make sure parallel agent calls get correct child spans in claude agent sdk --- .../wrappers/claude_agent_sdk/_wrapper.py | 23 +- ...led_subagents_preserve_task_parenting.json | 1462 +++++++++++++++++ .../wrappers/claude_agent_sdk/test_wrapper.py | 63 + 3 files changed, 1546 insertions(+), 2 deletions(-) create mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_five_parallel_bundled_subagents_preserve_task_parenting.json diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py index 4e418d2..9eb6b12 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py @@ -235,6 +235,7 @@ async def wrapped_handler(args: Any) -> Any: class ToolSpanTracker: def __init__(self): self._active_spans: dict[str, _ActiveToolSpan] = {} + self._pending_task_link_tool_use_ids: set[str] = set() def start_tool_spans(self, message: Any, llm_span_export: str | None) -> None: if llm_span_export is None or not hasattr(message, "content"): @@ -278,6 +279,8 @@ def start_tool_spans(self, message: Any, llm_span_export: str | None) -> None: display_name=parsed_tool_name.display_name, input=getattr(block, "input", None), ) + if parsed_tool_name.display_name == "Agent": + self._pending_task_link_tool_use_ids.add(tool_use_id) def finish_tool_spans(self, message: Any) -> None: if not hasattr(message, "content"): @@ -303,6 +306,16 @@ def cleanup(self, end_time: float | None = None, exclude_tool_use_ids: frozenset def has_active_spans(self) -> bool: return bool(self._active_spans) + @property + def pending_task_link_tool_use_ids(self) -> frozenset[str]: + return frozenset(self._pending_task_link_tool_use_ids) + + def mark_task_started(self, tool_use_id: Any) -> None: + if tool_use_id is None: + return + + self._pending_task_link_tool_use_ids.discard(str(tool_use_id)) + def acquire_span_for_handler(self, tool_name: Any, args: Any) -> _ActiveToolSpan | None: parsed_tool_name = _parse_tool_name(tool_name) candidate_names = list(dict.fromkeys((parsed_tool_name.raw_name, parsed_tool_name.display_name, str(tool_name)))) @@ -323,6 +336,7 @@ def acquire_span_for_handler(self, tool_name: Any, args: Any) -> _ActiveToolSpan def _end_tool_span(self, tool_use_id: str, tool_result_block: Any | None = None, end_time: float | None = None) -> None: active_tool_span = self._active_spans.pop(tool_use_id, None) + self._pending_task_link_tool_use_ids.discard(tool_use_id) if active_tool_span is None: return @@ -491,7 +505,9 @@ def process(self, message: Any) -> None: self._active_task_order.append(task_id) tool_use_id = getattr(message, "tool_use_id", None) if tool_use_id is not None: - self._task_span_by_tool_use_id[str(tool_use_id)] = task_span + tool_use_id = str(tool_use_id) + self._task_span_by_tool_use_id[tool_use_id] = task_span + self._tool_tracker.mark_task_started(tool_use_id) else: update: dict[str, Any] = {} metadata = self._metadata(message) @@ -693,9 +709,12 @@ async def receive_response(self) -> AsyncGenerator[Any, None]: if message_type == MessageClassName.ASSISTANT: if llm_tracker.current_span and tool_tracker.has_active_spans: + active_subagent_tool_use_ids = ( + task_event_span_tracker.active_tool_use_ids | tool_tracker.pending_task_link_tool_use_ids + ) tool_tracker.cleanup( end_time=llm_tracker.get_next_start_time(), - exclude_tool_use_ids=task_event_span_tracker.active_tool_use_ids, + exclude_tool_use_ids=active_subagent_tool_use_ids, ) llm_parent_export = task_event_span_tracker.parent_export_for_message( message, diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_five_parallel_bundled_subagents_preserve_task_parenting.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_five_parallel_bundled_subagents_preserve_task_parenting.json new file mode 100644 index 0000000..adc327a --- /dev/null +++ b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_five_parallel_bundled_subagents_preserve_task_parenting.json @@ -0,0 +1,1462 @@ +{ + "cassette_name": "tmp_probe_parallel_subagents_first_response", + "events": [ + { + "op": "write", + "payload": { + "kind": "json", + "value": { + "request": { + "hooks": null, + "subtype": "initialize" + }, + "request_id": "req_1_a60a2c6f", + "type": "control_request" + } + } + }, + { + "op": "read", + "payload": { + "response": { + "request_id": "req_1_a60a2c6f", + "response": { + "account": { + "apiKeySource": "ANTHROPIC_API_KEY", + "tokenSource": "none" + }, + "agents": [], + "available_output_styles": [], + "commands": [], + "models": [] + }, + "subtype": "success" + }, + "type": "control_response" + } + }, + { + "op": "write", + "payload": { + "kind": "json", + "value": { + "message": { + "content": "Use exactly five bundled general-purpose subagents, one for each file note_0.txt through note_4.txt. In your first assistant response, emit all five Agent tool calls before waiting for any subagent result. Do not emit explanatory text before or between the Agent tool calls if the tool API allows it. Each delegated subagent must use Read on exactly its assigned file and return only label= | owner=. After all five finish, reply with exactly five lines in order 0 through 4. Do not answer directly without using all five subagents.", + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "default", + "type": "user" + } + } + }, + { + "op": "read", + "payload": { + "agents": [ + "general-purpose", + "statusline-setup", + "Explore", + "Plan" + ], + "apiKeySource": "ANTHROPIC_API_KEY", + "claude_code_version": "2.1.71", + "cwd": "", + "fast_mode_state": "off", + "mcp_servers": [], + "model": "claude-haiku-4-5-20251001", + "output_style": "default", + "permissionMode": "bypassPermissions", + "plugins": [], + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "skills": [ + "keybindings-help", + "debug", + "simplify", + "batch", + "loop", + "claude-api" + ], + "slash_commands": [ + "keybindings-help", + "debug", + "simplify", + "batch", + "loop", + "claude-api", + "compact", + "context", + "cost", + "heapdump", + "init", + "pr-comments", + "release-notes", + "review", + "security-review", + "insights" + ], + "subtype": "init", + "tools": [ + "Task", + "TaskOutput", + "Bash", + "Glob", + "Grep", + "ExitPlanMode", + "Read", + "Edit", + "Write", + "NotebookEdit", + "WebFetch", + "TodoWrite", + "WebSearch", + "TaskStop", + "AskUserQuestion", + "Skill", + "EnterPlanMode", + "EnterWorktree", + "CronCreate", + "CronDelete", + "CronList", + "ToolSearch" + ], + "type": "system", + "uuid": "386d0286-29dc-4c76-9e9b-ce3716e9aa5a" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "signature": "", + "thinking": "The user is asking me to:\n1. Use exactly five bundled general-purpose subagents\n2. Each subagent should work on one file: note_0.txt through note_4.txt\n3. In my first response, emit all five Agent tool calls before waiting for results\n4. Each subagent must use Read on exactly its assigned file\n5. Each subagent should return only: label= | owner=\n6. After all five finish, reply with exactly five lines in order 0 through 4\n7. Do not answer directly without using all five subagents\n\nSo I need to make 5 Agent calls, each targeting a specific note file. Let me structure this:\n\nAgent 1: Read note_0.txt and extract label and owner\nAgent 2: Read note_1.txt and extract label and owner\nAgent 3: Read note_2.txt and extract label and owner\nAgent 4: Read note_3.txt and extract label and owner\nAgent 5: Read note_4.txt and extract label and owner\n\nI'll use general-purpose agents (the default type) and give them clear instructions to read their assigned file and return the label and owner information.", + "type": "thinking" + } + ], + "context_management": null, + "id": "msg_01Sc72rRptbdVxqBdjPGruB2", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1000 + }, + "cache_creation_input_tokens": 1000, + "cache_read_input_tokens": 17022, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 7, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "assistant", + "uuid": "af74b44f-440e-489d-a52b-963a45adedad" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_01Xj9tJkKzBuditLw7rRPosq", + "input": { + "description": "Read note_0.txt and extract label and owner", + "prompt": "Read the file note_0.txt. Extract the label and owner fields. Return ONLY: label= | owner=" + }, + "name": "Agent", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01Sc72rRptbdVxqBdjPGruB2", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1000 + }, + "cache_creation_input_tokens": 1000, + "cache_read_input_tokens": 17022, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 7, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "assistant", + "uuid": "f7b10aca-c1e6-4baf-8873-cfbceae09284" + } + }, + { + "op": "read", + "payload": { + "description": "Read note_0.txt and extract label and owner", + "prompt": "Read the file note_0.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "subtype": "task_started", + "task_id": "ad1f8a983ffc73bdf", + "task_type": "local_agent", + "tool_use_id": "toolu_01Xj9tJkKzBuditLw7rRPosq", + "type": "system", + "uuid": "8663e478-49fa-43c9-87b2-8581c41e1fa5" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "Read the file note_0.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "type": "text" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01Xj9tJkKzBuditLw7rRPosq", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "user", + "uuid": "b197670f-c8cc-4f88-8c4f-ae763a6a3b61" + } + }, + { + "op": "read", + "payload": { + "description": "Reading note_0.txt", + "last_tool_name": "Read", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "subtype": "task_progress", + "task_id": "ad1f8a983ffc73bdf", + "tool_use_id": "toolu_01Xj9tJkKzBuditLw7rRPosq", + "type": "system", + "usage": { + "duration_ms": 902, + "tool_uses": 1, + "total_tokens": 13131 + }, + "uuid": "3e9a5204-4ec9-494c-8d8c-304e40efe6f0" + } + }, + { + "op": "read", + "payload": { + "output_file": "", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "status": "completed", + "subtype": "task_notification", + "summary": "Read note_0.txt and extract label and owner", + "task_id": "ad1f8a983ffc73bdf", + "tool_use_id": "toolu_01Xj9tJkKzBuditLw7rRPosq", + "type": "system", + "usage": { + "duration_ms": 1406, + "tool_uses": 1, + "total_tokens": 13315 + }, + "uuid": "7690ba4f-aa9e-4983-9bd8-79f21b7508c4" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_019H6kS4UhbbncSp7tT4wxBH", + "input": { + "file_path": "/private/tmp/subagent_race_probe2/note_0.txt" + }, + "name": "Read", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01AryHBcgqQWJDawFJvzxfph", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1027 + }, + "cache_creation_input_tokens": 1027, + "cache_read_input_tokens": 12075, + "inference_geo": "not_available", + "input_tokens": 3, + "output_tokens": 26, + "service_tier": "standard" + } + }, + "parent_tool_use_id": "toolu_01Xj9tJkKzBuditLw7rRPosq", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "assistant", + "uuid": "f7ddbd4a-792b-407a-b4a2-dd5d52e357d6" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": " 1\u2192label=0\n 2\u2192owner=owner_0\n 3\u2192\n\n\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n\n", + "tool_use_id": "toolu_019H6kS4UhbbncSp7tT4wxBH", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01Xj9tJkKzBuditLw7rRPosq", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "user", + "uuid": "627daaef-10d9-4ae2-8498-437f13274e54" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": [ + { + "text": "label=0 | owner=owner_0", + "type": "text" + }, + { + "text": "agentId: ad1f8a983ffc73bdf (for resuming to continue this agent's work if needed)\ntotal_tokens: 13301\ntool_uses: 1\nduration_ms: 1408", + "type": "text" + } + ], + "tool_use_id": "toolu_01Xj9tJkKzBuditLw7rRPosq", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "tool_use_result": { + "agentId": "ad1f8a983ffc73bdf", + "content": [ + { + "text": "label=0 | owner=owner_0", + "type": "text" + } + ], + "prompt": "Read the file note_0.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "status": "completed", + "totalDurationMs": 1408, + "totalTokens": 13301, + "totalToolUseCount": 1, + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 180 + }, + "cache_creation_input_tokens": 180, + "cache_read_input_tokens": 13102, + "inference_geo": "", + "input_tokens": 6, + "iterations": [], + "output_tokens": 13, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + } + }, + "type": "user", + "uuid": "a6d1e4b6-be7e-4245-9a1f-0f06f02bfad3" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_01XzUTsHESrS1PiP41Vgerfp", + "input": { + "description": "Read note_1.txt and extract label and owner", + "prompt": "Read the file note_1.txt. Extract the label and owner fields. Return ONLY: label= | owner=" + }, + "name": "Agent", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01Sc72rRptbdVxqBdjPGruB2", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1000 + }, + "cache_creation_input_tokens": 1000, + "cache_read_input_tokens": 17022, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 7, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "assistant", + "uuid": "d6356e99-89c2-490b-8d83-14bba3887dde" + } + }, + { + "op": "read", + "payload": { + "description": "Read note_1.txt and extract label and owner", + "prompt": "Read the file note_1.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "subtype": "task_started", + "task_id": "a19892d9a9f5d2cc4", + "task_type": "local_agent", + "tool_use_id": "toolu_01XzUTsHESrS1PiP41Vgerfp", + "type": "system", + "uuid": "97e178ee-4490-4b1f-ab7f-61dc7246f416" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "Read the file note_1.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "type": "text" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01XzUTsHESrS1PiP41Vgerfp", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "user", + "uuid": "d0d34a77-9d90-48df-be11-d370431daf98" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_01DSPK5DSYpLCEceaBRUgKxY", + "input": { + "description": "Read note_2.txt and extract label and owner", + "prompt": "Read the file note_2.txt. Extract the label and owner fields. Return ONLY: label= | owner=" + }, + "name": "Agent", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01Sc72rRptbdVxqBdjPGruB2", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1000 + }, + "cache_creation_input_tokens": 1000, + "cache_read_input_tokens": 17022, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 7, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "assistant", + "uuid": "664b0c8d-8c37-48dc-b277-314902a62cfd" + } + }, + { + "op": "read", + "payload": { + "description": "Read note_2.txt and extract label and owner", + "prompt": "Read the file note_2.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "subtype": "task_started", + "task_id": "ac11fd4cb65cdc656", + "task_type": "local_agent", + "tool_use_id": "toolu_01DSPK5DSYpLCEceaBRUgKxY", + "type": "system", + "uuid": "42de25ae-a422-449e-ab75-3e608ce4ddba" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "Read the file note_2.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "type": "text" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01DSPK5DSYpLCEceaBRUgKxY", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "user", + "uuid": "60bf4d8b-d09d-4a2d-8055-72e3efc8039c" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_01CdY7YzTvfe9YVrvD2i6E5t", + "input": { + "description": "Read note_3.txt and extract label and owner", + "prompt": "Read the file note_3.txt. Extract the label and owner fields. Return ONLY: label= | owner=" + }, + "name": "Agent", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01Sc72rRptbdVxqBdjPGruB2", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1000 + }, + "cache_creation_input_tokens": 1000, + "cache_read_input_tokens": 17022, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 7, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "assistant", + "uuid": "11236044-6aa3-469a-acc1-9709eda16166" + } + }, + { + "op": "read", + "payload": { + "description": "Read note_3.txt and extract label and owner", + "prompt": "Read the file note_3.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "subtype": "task_started", + "task_id": "a58a9eef9410e27b4", + "task_type": "local_agent", + "tool_use_id": "toolu_01CdY7YzTvfe9YVrvD2i6E5t", + "type": "system", + "uuid": "fff75675-cf99-49e3-8c30-88041952843e" + } + }, + { + "op": "read", + "payload": { + "description": "Reading note_1.txt", + "last_tool_name": "Read", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "subtype": "task_progress", + "task_id": "a19892d9a9f5d2cc4", + "tool_use_id": "toolu_01XzUTsHESrS1PiP41Vgerfp", + "type": "system", + "usage": { + "duration_ms": 755, + "tool_uses": 1, + "total_tokens": 13121 + }, + "uuid": "a7f69ee3-0349-45f9-92dc-acccc3218650" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_01TsL6rvzpMxDnptY1PCsWNq", + "input": { + "file_path": "/private/tmp/subagent_race_probe2/note_1.txt" + }, + "name": "Read", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01RyJhHjmbqgwH8vN7wQxp8D", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 429 + }, + "cache_creation_input_tokens": 429, + "cache_read_input_tokens": 12673, + "inference_geo": "not_available", + "input_tokens": 3, + "output_tokens": 8, + "service_tier": "standard" + } + }, + "parent_tool_use_id": "toolu_01XzUTsHESrS1PiP41Vgerfp", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "assistant", + "uuid": "d5e7c408-724c-4ac2-a2ed-338def1e1942" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "Read the file note_3.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "type": "text" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01CdY7YzTvfe9YVrvD2i6E5t", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "user", + "uuid": "7488781d-cae7-4ce9-a117-f9a7bcd42263" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": " 1\u2192label=1\n 2\u2192owner=owner_1\n 3\u2192\n\n\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n\n", + "tool_use_id": "toolu_01TsL6rvzpMxDnptY1PCsWNq", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01XzUTsHESrS1PiP41Vgerfp", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "user", + "uuid": "30fefc70-9cdc-414f-aed4-4d77808c1e17" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_018uoqWFdym3abM2H8dfHt8i", + "input": { + "description": "Read note_4.txt and extract label and owner", + "prompt": "Read the file note_4.txt. Extract the label and owner fields. Return ONLY: label= | owner=" + }, + "name": "Agent", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01Sc72rRptbdVxqBdjPGruB2", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1000 + }, + "cache_creation_input_tokens": 1000, + "cache_read_input_tokens": 17022, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 7, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "assistant", + "uuid": "d73c4a6c-975a-46bf-b4b2-26d5004a96fe" + } + }, + { + "op": "read", + "payload": { + "description": "Read note_4.txt and extract label and owner", + "prompt": "Read the file note_4.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "subtype": "task_started", + "task_id": "a9ee54c15b896f2ae", + "task_type": "local_agent", + "tool_use_id": "toolu_018uoqWFdym3abM2H8dfHt8i", + "type": "system", + "uuid": "2933e00d-cc7f-425b-9281-56f67ea8ffef" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "Read the file note_4.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "type": "text" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_018uoqWFdym3abM2H8dfHt8i", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "user", + "uuid": "446ad8a1-b29f-484a-b478-3eab360f59f2" + } + }, + { + "op": "read", + "payload": { + "output_file": "", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "status": "completed", + "subtype": "task_notification", + "summary": "Read note_1.txt and extract label and owner", + "task_id": "a19892d9a9f5d2cc4", + "tool_use_id": "toolu_01XzUTsHESrS1PiP41Vgerfp", + "type": "system", + "usage": { + "duration_ms": 1339, + "tool_uses": 1, + "total_tokens": 13323 + }, + "uuid": "09153b8d-0cad-49fc-8262-2d0e5e92df7f" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": [ + { + "text": "label=1 | owner=owner_1", + "type": "text" + }, + { + "text": "agentId: a19892d9a9f5d2cc4 (for resuming to continue this agent's work if needed)\ntotal_tokens: 13319\ntool_uses: 1\nduration_ms: 1340", + "type": "text" + } + ], + "tool_use_id": "toolu_01XzUTsHESrS1PiP41Vgerfp", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "tool_use_result": { + "agentId": "a19892d9a9f5d2cc4", + "content": [ + { + "text": "label=1 | owner=owner_1", + "type": "text" + } + ], + "prompt": "Read the file note_1.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "status": "completed", + "totalDurationMs": 1340, + "totalTokens": 13319, + "totalToolUseCount": 1, + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 198 + }, + "cache_creation_input_tokens": 198, + "cache_read_input_tokens": 13102, + "inference_geo": "", + "input_tokens": 6, + "iterations": [], + "output_tokens": 13, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + } + }, + "type": "user", + "uuid": "226fcf48-67b3-482b-91bf-b7d929d531c9" + } + }, + { + "op": "read", + "payload": { + "description": "Reading note_3.txt", + "last_tool_name": "Read", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "subtype": "task_progress", + "task_id": "a58a9eef9410e27b4", + "tool_use_id": "toolu_01CdY7YzTvfe9YVrvD2i6E5t", + "type": "system", + "usage": { + "duration_ms": 998, + "tool_uses": 1, + "total_tokens": 13121 + }, + "uuid": "957b36fe-b5ae-43eb-a7bf-89cfa9d3c6af" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_015wydqqnmHy1fTbVfFyx98G", + "input": { + "file_path": "/private/tmp/subagent_race_probe2/note_3.txt" + }, + "name": "Read", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01JwwnKLTn4jrp8rrQuLuQtQ", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 429 + }, + "cache_creation_input_tokens": 429, + "cache_read_input_tokens": 12673, + "inference_geo": "not_available", + "input_tokens": 3, + "output_tokens": 8, + "service_tier": "standard" + } + }, + "parent_tool_use_id": "toolu_01CdY7YzTvfe9YVrvD2i6E5t", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "assistant", + "uuid": "76d94ea0-e91a-4e4f-a0f9-57fce0c386e3" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": " 1\u2192label=3\n 2\u2192owner=owner_3\n 3\u2192\n\n\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n\n", + "tool_use_id": "toolu_015wydqqnmHy1fTbVfFyx98G", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01CdY7YzTvfe9YVrvD2i6E5t", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "user", + "uuid": "6cdffd72-5ca6-4065-a40d-d5f4d4dcfcf5" + } + }, + { + "op": "read", + "payload": { + "description": "Reading note_2.txt", + "last_tool_name": "Read", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "subtype": "task_progress", + "task_id": "ac11fd4cb65cdc656", + "tool_use_id": "toolu_01DSPK5DSYpLCEceaBRUgKxY", + "type": "system", + "usage": { + "duration_ms": 1592, + "tool_uses": 1, + "total_tokens": 13121 + }, + "uuid": "4b7d19ee-f99c-48f3-82a4-57c6ebc7f6e3" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_01FLZeCMxHvx1o3DrwzwbRQz", + "input": { + "file_path": "/private/tmp/subagent_race_probe2/note_2.txt" + }, + "name": "Read", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01SZ5DB7cwY75gc7GdDHwnQD", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 429 + }, + "cache_creation_input_tokens": 429, + "cache_read_input_tokens": 12673, + "inference_geo": "not_available", + "input_tokens": 3, + "output_tokens": 8, + "service_tier": "standard" + } + }, + "parent_tool_use_id": "toolu_01DSPK5DSYpLCEceaBRUgKxY", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "assistant", + "uuid": "6bf818e5-76aa-41fc-ae73-647fa5cdd84c" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": " 1\u2192label=2\n 2\u2192owner=owner_2\n 3\u2192\n\n\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n\n", + "tool_use_id": "toolu_01FLZeCMxHvx1o3DrwzwbRQz", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01DSPK5DSYpLCEceaBRUgKxY", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "user", + "uuid": "661e7019-6272-466f-8f0d-0e8d1db28ade" + } + }, + { + "op": "read", + "payload": { + "description": "Reading note_4.txt", + "last_tool_name": "Read", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "subtype": "task_progress", + "task_id": "a9ee54c15b896f2ae", + "tool_use_id": "toolu_018uoqWFdym3abM2H8dfHt8i", + "type": "system", + "usage": { + "duration_ms": 976, + "tool_uses": 1, + "total_tokens": 13115 + }, + "uuid": "1a80f169-5fb0-4a0c-8a9d-310f4edbab32" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_01PsDS96RUcvjtDzV9YGZwfW", + "input": { + "file_path": "/private/tmp/subagent_race_probe2/note_4.txt" + }, + "name": "Read", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01P7UWLgzFjVN28uZ48sVXmB", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 429 + }, + "cache_creation_input_tokens": 429, + "cache_read_input_tokens": 12673, + "inference_geo": "not_available", + "input_tokens": 3, + "output_tokens": 5, + "service_tier": "standard" + } + }, + "parent_tool_use_id": "toolu_018uoqWFdym3abM2H8dfHt8i", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "assistant", + "uuid": "65c5fe68-a56c-45fb-a8e7-56b0292cc116" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": " 1\u2192label=4\n 2\u2192owner=owner_4\n 3\u2192\n\n\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n\n", + "tool_use_id": "toolu_01PsDS96RUcvjtDzV9YGZwfW", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_018uoqWFdym3abM2H8dfHt8i", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "user", + "uuid": "9206339b-e3f6-4edd-86ff-141f31f3eec9" + } + }, + { + "op": "read", + "payload": { + "output_file": "", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "status": "completed", + "subtype": "task_notification", + "summary": "Read note_3.txt and extract label and owner", + "task_id": "a58a9eef9410e27b4", + "tool_use_id": "toolu_01CdY7YzTvfe9YVrvD2i6E5t", + "type": "system", + "usage": { + "duration_ms": 1622, + "tool_uses": 1, + "total_tokens": 13318 + }, + "uuid": "476faa14-cf7c-4d98-8c0c-0fa749914157" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": [ + { + "text": "label=3 | owner=owner_3", + "type": "text" + }, + { + "text": "agentId: a58a9eef9410e27b4 (for resuming to continue this agent's work if needed)\ntotal_tokens: 13314\ntool_uses: 1\nduration_ms: 1622", + "type": "text" + } + ], + "tool_use_id": "toolu_01CdY7YzTvfe9YVrvD2i6E5t", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "tool_use_result": { + "agentId": "a58a9eef9410e27b4", + "content": [ + { + "text": "label=3 | owner=owner_3", + "type": "text" + } + ], + "prompt": "Read the file note_3.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "status": "completed", + "totalDurationMs": 1622, + "totalTokens": 13314, + "totalToolUseCount": 1, + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 193 + }, + "cache_creation_input_tokens": 193, + "cache_read_input_tokens": 13102, + "inference_geo": "", + "input_tokens": 6, + "iterations": [], + "output_tokens": 13, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + } + }, + "type": "user", + "uuid": "c8104327-ddc0-42d2-be22-403b1d6f2f7b" + } + }, + { + "op": "read", + "payload": { + "output_file": "", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "status": "completed", + "subtype": "task_notification", + "summary": "Read note_4.txt and extract label and owner", + "task_id": "a9ee54c15b896f2ae", + "tool_use_id": "toolu_018uoqWFdym3abM2H8dfHt8i", + "type": "system", + "usage": { + "duration_ms": 1653, + "tool_uses": 1, + "total_tokens": 13317 + }, + "uuid": "f6c874b2-fc43-43f7-b89b-7435dcf420ce" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": [ + { + "text": "label=4 | owner=owner_4", + "type": "text" + }, + { + "text": "agentId: a9ee54c15b896f2ae (for resuming to continue this agent's work if needed)\ntotal_tokens: 13319\ntool_uses: 1\nduration_ms: 1653", + "type": "text" + } + ], + "tool_use_id": "toolu_018uoqWFdym3abM2H8dfHt8i", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "tool_use_result": { + "agentId": "a9ee54c15b896f2ae", + "content": [ + { + "text": "label=4 | owner=owner_4", + "type": "text" + } + ], + "prompt": "Read the file note_4.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "status": "completed", + "totalDurationMs": 1653, + "totalTokens": 13319, + "totalToolUseCount": 1, + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 198 + }, + "cache_creation_input_tokens": 198, + "cache_read_input_tokens": 13102, + "inference_geo": "", + "input_tokens": 6, + "iterations": [], + "output_tokens": 13, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + } + }, + "type": "user", + "uuid": "b97cdee7-a0a3-4ca8-a02b-38429cde5d3b" + } + }, + { + "op": "read", + "payload": { + "output_file": "", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "status": "completed", + "subtype": "task_notification", + "summary": "Read note_2.txt and extract label and owner", + "task_id": "ac11fd4cb65cdc656", + "tool_use_id": "toolu_01DSPK5DSYpLCEceaBRUgKxY", + "type": "system", + "usage": { + "duration_ms": 3041, + "tool_uses": 1, + "total_tokens": 13318 + }, + "uuid": "a24f4247-e4cf-4f09-8bdf-71fb259a0a5d" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": [ + { + "text": "label=2 | owner=owner_2", + "type": "text" + }, + { + "text": "agentId: ac11fd4cb65cdc656 (for resuming to continue this agent's work if needed)\ntotal_tokens: 13314\ntool_uses: 1\nduration_ms: 3042", + "type": "text" + } + ], + "tool_use_id": "toolu_01DSPK5DSYpLCEceaBRUgKxY", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "tool_use_result": { + "agentId": "ac11fd4cb65cdc656", + "content": [ + { + "text": "label=2 | owner=owner_2", + "type": "text" + } + ], + "prompt": "Read the file note_2.txt. Extract the label and owner fields. Return ONLY: label= | owner=", + "status": "completed", + "totalDurationMs": 3042, + "totalTokens": 13314, + "totalToolUseCount": 1, + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 193 + }, + "cache_creation_input_tokens": 193, + "cache_read_input_tokens": 13102, + "inference_geo": "", + "input_tokens": 6, + "iterations": [], + "output_tokens": 13, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + } + }, + "type": "user", + "uuid": "855e4253-9b12-46b1-91db-6221c8a72d43" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "signature": "", + "thinking": "Perfect! All five subagents have completed and returned their results. Now I need to format the response as exactly five lines in order 0 through 4:\n\nlabel=0 | owner=owner_0\nlabel=1 | owner=owner_1\nlabel=2 | owner=owner_2\nlabel=3 | owner=owner_3\nlabel=4 | owner=owner_4", + "type": "thinking" + } + ], + "context_management": null, + "id": "msg_01WToj9fQGSUDHf5FoE4Lqgg", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1044 + }, + "cache_creation_input_tokens": 1044, + "cache_read_input_tokens": 18022, + "inference_geo": "not_available", + "input_tokens": 208, + "output_tokens": 3, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "assistant", + "uuid": "61dfc657-e94e-48d4-af7a-4fc2f69f32f1" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "label=0 | owner=owner_0\nlabel=1 | owner=owner_1\nlabel=2 | owner=owner_2\nlabel=3 | owner=owner_3\nlabel=4 | owner=owner_4", + "type": "text" + } + ], + "context_management": null, + "id": "msg_01WToj9fQGSUDHf5FoE4Lqgg", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1044 + }, + "cache_creation_input_tokens": 1044, + "cache_read_input_tokens": 18022, + "inference_geo": "not_available", + "input_tokens": 208, + "output_tokens": 3, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "type": "assistant", + "uuid": "ba75e896-9472-4669-b3a7-7ebe1dd788d3" + } + }, + { + "op": "read", + "payload": { + "duration_api_ms": 29320, + "duration_ms": 22765, + "fast_mode_state": "off", + "is_error": false, + "modelUsage": { + "claude-haiku-4-5-20251001": { + "cacheCreationInputTokens": 5749, + "cacheReadInputTokens": 163321, + "contextWindow": 200000, + "costUSD": 0.030711350000000002, + "inputTokens": 263, + "maxOutputTokens": 32000, + "outputTokens": 1386, + "webSearchRequests": 0 + } + }, + "num_turns": 6, + "permission_denials": [], + "result": "label=0 | owner=owner_0\nlabel=1 | owner=owner_1\nlabel=2 | owner=owner_2\nlabel=3 | owner=owner_3\nlabel=4 | owner=owner_4", + "session_id": "f4a8a689-b641-460e-9747-cc68f3069fdd", + "stop_reason": "end_turn", + "subtype": "success", + "total_cost_usd": 0.030711350000000002, + "type": "result", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 2044 + }, + "cache_creation_input_tokens": 2044, + "cache_read_input_tokens": 35044, + "inference_geo": "", + "input_tokens": 218, + "iterations": [], + "output_tokens": 899, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + }, + "uuid": "9db3bcc2-fe3d-4aa1-a092-14395be6ce86" + } + } + ], + "sdk_version": "0.1.48" +} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py index 2e8e49c..34bb830 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py @@ -460,6 +460,69 @@ async def test_multiple_bundled_subagents_keep_outer_orchestration_separate(memo ), "Expected delegated tool spans to nest under delegated LLM spans" +@pytest.mark.skipif(not CLAUDE_SDK_AVAILABLE, reason="Claude Agent SDK not installed") +@pytest.mark.asyncio +async def test_five_parallel_bundled_subagents_preserve_task_parenting(memory_logger, tmp_path): + assert not memory_logger.pop() + if not _sdk_version_at_least("0.1.48"): + pytest.skip("Bundled subagent task events were not observed on older Claude Agent SDK versions") + + workspace = tmp_path / "subagent_parenting_workspace" + workspace.mkdir() + for i in range(5): + (workspace / f"note_{i}.txt").write_text(f"label={i}\nowner=owner_{i}\n", encoding="utf-8") + + with _patched_claude_sdk(wrap_client=True): + options = claude_agent_sdk.ClaudeAgentOptions( + model=TEST_MODEL, + cwd=workspace, + permission_mode="bypassPermissions", + max_turns=20, + ) + transport = make_cassette_transport( + cassette_name="test_five_parallel_bundled_subagents_preserve_task_parenting", + prompt="", + options=options, + ) + + async with claude_agent_sdk.ClaudeSDKClient(options=options, transport=transport) as client: + await client.query( + "Use exactly five bundled general-purpose subagents, one for each file note_0.txt through note_4.txt. " + "In your first assistant response, emit all five Agent tool calls before waiting for any subagent result. " + "Do not emit explanatory text before or between the Agent tool calls if the tool API allows it. " + "Each delegated subagent must use Read on exactly its assigned file and return only label= | owner=. " + "After all five finish, reply with exactly five lines in order 0 through 4. " + "Do not answer directly without using all five subagents." + ) + async for message in client.receive_response(): + if type(message).__name__ == "ResultMessage": + break + + spans = memory_logger.pop() + task_spans = _find_spans_by_type(spans, SpanTypeAttribute.TASK) + tool_spans = _find_spans_by_type(spans, SpanTypeAttribute.TOOL) + + root_task_span = _find_span_by_name(task_spans, "Claude Agent") + subagent_spans = [span for span in task_spans if span["span_id"] != root_task_span["span_id"]] + assert len(subagent_spans) == 5, f"Expected 5 delegated task spans, got {len(subagent_spans)}" + + agent_tool_spans_by_id = { + span.get("metadata", {}).get("gen_ai.tool.call.id"): span + for span in tool_spans + if span["span_attributes"]["name"] == "Agent" + } + assert len(agent_tool_spans_by_id) == 5, f"Expected 5 Agent tool spans, got {len(agent_tool_spans_by_id)}" + + for subagent_span in subagent_spans: + tool_use_id = subagent_span.get("metadata", {}).get("tool_use_id") + assert tool_use_id, f"Expected task span metadata to include tool_use_id: {subagent_span}" + agent_tool_span = agent_tool_spans_by_id.get(tool_use_id) + assert agent_tool_span is not None, f"Missing Agent tool span for tool_use_id={tool_use_id}" + assert agent_tool_span["span_id"] in subagent_span["span_parents"] + assert root_task_span["span_id"] not in subagent_span["span_parents"] + assert agent_tool_span["metrics"]["end"] >= subagent_span["metrics"]["end"] + + @pytest.mark.asyncio async def test_delegated_subagent_llm_and_tool_spans_nest_under_task_span(memory_logger): assert not memory_logger.pop() From 241a0f97f38696f5315867d8ecb9440c4a8c64e0 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Thu, 12 Mar 2026 13:50:06 -0400 Subject: [PATCH 2/3] catch asyncio.CancelledError --- .../wrappers/claude_agent_sdk/_wrapper.py | 9 ++- .../wrappers/claude_agent_sdk/test_wrapper.py | 72 +++++++++++++++++++ 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py index 9eb6b12..79a396a 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py @@ -1,3 +1,4 @@ +import asyncio import dataclasses import logging import threading @@ -763,8 +764,12 @@ async def receive_response(self) -> AsyncGenerator[Any, None]: task_events.append(_serialize_system_message(message)) yield message - except Exception: - raise + except asyncio.CancelledError: + current_task = asyncio.current_task() + if current_task is not None and current_task.cancelling(): + raise + if final_results: + span.log(output=final_results[-1]) else: if final_results: span.log(output=final_results[-1]) diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py index 34bb830..c614b16 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py @@ -1120,6 +1120,21 @@ async def __aexit__(self, *args): return None +class FakeCancelledClaudeSDKClient(FakeClaudeSDKClient): + """Simulates the real error: messages are yielded, then CancelledError on stream close. + + The real error comes from anyio's MemoryObjectReceiveStream when the Claude + subprocess connection closes — the internal ``await receive_event.wait()`` + raises ``asyncio.CancelledError`` even though the asyncio Task itself is + *not* being cancelled. + """ + + async def receive_response(self): + for message in self.messages: + yield message + raise asyncio.CancelledError + + def _find_spans_by_type(spans: list[dict[str, Any]], span_type: str) -> list[dict[str, Any]]: return [span for span in spans if span.get("span_attributes", {}).get("type") == span_type] @@ -1150,6 +1165,63 @@ def _clear_tool_span_tracker() -> None: delattr(_thread_local, "tool_span_tracker") +@pytest.mark.asyncio +async def test_receive_response_suppresses_unexpected_cancelled_error_empty_stream(memory_logger): + """CancelledError on an empty stream is suppressed without error.""" + assert not memory_logger.pop() + + wrapped_client_class = _create_client_wrapper_class(FakeCancelledClaudeSDKClient) + client = wrapped_client_class() + # No messages — CancelledError fires immediately on iteration. + + await client.query("Delegate this task.") + received = [] + async for message in client.receive_response(): + received.append(message) + + assert received == [] + + spans = memory_logger.pop() + task_spans = _find_spans_by_type(spans, SpanTypeAttribute.TASK) + assert len(task_spans) == 1 + assert task_spans[0]["span_attributes"]["name"] == "Claude Agent" + assert task_spans[0].get("error") is None + + +@pytest.mark.asyncio +async def test_receive_response_suppresses_cancelled_error_after_messages(memory_logger): + """CancelledError after real messages still logs output and doesn't propagate.""" + assert not memory_logger.pop() + + wrapped_client_class = _create_client_wrapper_class(FakeCancelledClaudeSDKClient) + client = wrapped_client_class() + client._WrappedClaudeSDKClient__client.messages = [ # type: ignore[attr-defined] + AssistantMessage(content=[TextBlock("The answer is 42.")]), + ResultMessage(), + ] + + await client.query("What is the meaning of life?") + received = [] + async for message in client.receive_response(): + received.append(message) + + # All messages yielded before the CancelledError should be received. + assert len(received) == 2 + + spans = memory_logger.pop() + task_spans = _find_spans_by_type(spans, SpanTypeAttribute.TASK) + assert len(task_spans) == 1 + task_span = task_spans[0] + assert task_span["span_attributes"]["name"] == "Claude Agent" + assert task_span.get("error") is None + # Output should still be logged despite the CancelledError at stream close. + assert task_span.get("output") is not None + assert task_span["output"]["role"] == "assistant" + + llm_spans = _find_spans_by_type(spans, SpanTypeAttribute.LLM) + assert len(llm_spans) == 1 + + @pytest.mark.parametrize( "tool_name,expected", [ From c1ddc82ea9baec6de32e2400b2a3366f298fe6d0 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Thu, 12 Mar 2026 15:44:09 -0400 Subject: [PATCH 3/3] be more strict at handling cancelled errors --- .../wrappers/claude_agent_sdk/_wrapper.py | 9 ++- .../wrappers/claude_agent_sdk/test_wrapper.py | 77 +++++++++++++++++++ 2 files changed, 83 insertions(+), 3 deletions(-) diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py index 79a396a..43e5d8f 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py @@ -765,9 +765,12 @@ async def receive_response(self) -> AsyncGenerator[Any, None]: yield message except asyncio.CancelledError: - current_task = asyncio.current_task() - if current_task is not None and current_task.cancelling(): - raise + # The CancelledError may come from the subprocess transport + # (e.g., anyio internal cleanup when subagents complete) rather + # than a genuine external cancellation. We suppress it here so + # the response stream ends cleanly. If the caller genuinely + # cancelled the task, they still have pending cancellation + # requests that will fire at their next await point. if final_results: span.log(output=final_results[-1]) else: diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py index c614b16..9204968 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py @@ -1222,6 +1222,83 @@ async def test_receive_response_suppresses_cancelled_error_after_messages(memory assert len(llm_spans) == 1 +class FakeCancelledMidStreamClaudeSDKClient(FakeClaudeSDKClient): + """CancelledError fires *between* messages, simulating cancellation mid-stream.""" + + async def receive_response(self): + yield self.messages[0] + raise asyncio.CancelledError + + +@pytest.mark.asyncio +async def test_receive_response_suppresses_cancelled_error_mid_stream(memory_logger): + """CancelledError between messages still yields partial results and logs output.""" + assert not memory_logger.pop() + + wrapped_client_class = _create_client_wrapper_class(FakeCancelledMidStreamClaudeSDKClient) + client = wrapped_client_class() + client._WrappedClaudeSDKClient__client.messages = [ # type: ignore[attr-defined] + AssistantMessage(content=[TextBlock("Partial answer.")]), + # Second message never arrives — CancelledError fires instead. + AssistantMessage(content=[TextBlock("This should not be received.")]), + ] + + await client.query("Tell me something.") + received = [] + async for message in client.receive_response(): + received.append(message) + + # Only the first message should be received. + assert len(received) == 1 + + spans = memory_logger.pop() + task_spans = _find_spans_by_type(spans, SpanTypeAttribute.TASK) + assert len(task_spans) == 1 + task_span = task_spans[0] + assert task_span.get("error") is None + assert task_span.get("output") is not None + assert task_span["output"]["role"] == "assistant" + + +@pytest.mark.asyncio +async def test_genuine_task_cancel_propagates_after_receive_response(memory_logger): + """When the asyncio Task is genuinely cancelled, CancelledError propagates + at the caller's next await — not swallowed forever. + + This verifies that suppressing CancelledError inside the generator does not + permanently disarm a real ``task.cancel()`` on Python 3.12+ (where the + cancellation counter is decremented when the exception is caught). On + Python < 3.12 the behaviour is the same because the task cancel flag is + sticky until the CancelledError propagates. + """ + assert not memory_logger.pop() + + wrapped_client_class = _create_client_wrapper_class(FakeCancelledClaudeSDKClient) + client = wrapped_client_class() + client._WrappedClaudeSDKClient__client.messages = [ # type: ignore[attr-defined] + AssistantMessage(content=[TextBlock("Hello.")]), + ResultMessage(), + ] + + await client.query("Hi") + + async def _drain_and_sleep(): + """Consume the stream, then await something else.""" + async for _ in client.receive_response(): + pass + # This is the "next await" after the generator ends. + await asyncio.sleep(0) + + task = asyncio.ensure_future(_drain_and_sleep()) + # Let the task start and begin iterating. + await asyncio.sleep(0) + # Genuinely cancel the task from outside. + task.cancel() + + with pytest.raises(asyncio.CancelledError): + await task + + @pytest.mark.parametrize( "tool_name,expected", [