From 5e737de554770b465b2530bb9bb62da258a8f873 Mon Sep 17 00:00:00 2001 From: Matt Perpick Date: Thu, 5 Feb 2026 17:02:49 -0500 Subject: [PATCH] Fix CI: propagate background logger across thread boundaries for openai-agents >= 0.8 openai-agents 0.8.0 runs sync @function_tool functions via asyncio.to_thread(), which copies ContextVar context but not threading.local() state. The background logger override (_override_bg_logger) was threading.local(), so spans created in tool worker threads logged to the wrong (real HTTP) logger instead of the test memory logger -- causing test_agents_tool_openai_nested_spans to fail. Changes: - Change _override_bg_logger from threading.local() to ContextVar so it propagates via asyncio.to_thread() while remaining isolated per-context - Call setup_threads() in BraintrustTracingProcessor.__init__() to propagate ContextVars across ThreadPoolExecutor worker threads - Add test_openai_agents nox session with version matrix (latest, 0.8.0, 0.7.0) Co-Authored-By: Claude Opus 4.6 --- py/noxfile.py | 11 +++++++++++ py/src/braintrust/logger.py | 20 +++++++++++--------- py/src/braintrust/wrappers/openai.py | 2 ++ 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/py/noxfile.py b/py/noxfile.py index 33d5644e8..baf9c48f2 100644 --- a/py/noxfile.py +++ b/py/noxfile.py @@ -61,6 +61,7 @@ # CLI bundling started in 0.1.10 - older versions require external Claude Code installation CLAUDE_AGENT_SDK_VERSIONS = (LATEST, "0.1.10") AGNO_VERSIONS = (LATEST, "2.1.0") +OPENAI_AGENTS_VERSIONS = (LATEST, "0.8.0", "0.7.0") # pydantic_ai 1.x requires Python >= 3.10 # Two test suites with different version requirements: # 1. wrap_openai approach: works with older versions (0.1.9+) @@ -156,6 +157,16 @@ def test_openai(session, version): _run_core_tests(session) +@nox.session() +@nox.parametrize("version", OPENAI_AGENTS_VERSIONS, ids=OPENAI_AGENTS_VERSIONS) +def test_openai_agents(session, version): + _install_test_deps(session) + _install(session, "openai") + _install(session, "openai-agents", version) + _run_tests(session, f"{WRAPPER_DIR}/test_openai.py") + _run_core_tests(session) + + @nox.session() def test_openrouter(session): """Test wrap_openai with OpenRouter. Requires OPENROUTER_API_KEY env var.""" diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py index 10ccd3cc3..38ca2e09b 100644 --- a/py/src/braintrust/logger.py +++ b/py/src/braintrust/logger.py @@ -407,10 +407,12 @@ def default_get_api_conn(): self._id_generator = None # For unit-testing, tests may wish to temporarily override the global - # logger with a custom one. We allow this but keep the override variable - # thread-local to prevent the possibility that tests running on - # different threads unintentionally use the same override. - self._override_bg_logger = threading.local() + # logger with a custom one. We use a ContextVar so that the override + # propagates across thread boundaries (e.g. asyncio.to_thread) while + # still being isolated per-context (preventing test cross-talk). + self._override_bg_logger: contextvars.ContextVar[_BackgroundLogger | None] = contextvars.ContextVar( + "_override_bg_logger", default=None + ) self.reset_login_info() @@ -587,7 +589,7 @@ def user_info(self) -> Mapping[str, Any]: return self._user_info def global_bg_logger(self) -> "_BackgroundLogger": - return getattr(self._override_bg_logger, "logger", None) or self._global_bg_logger.get() + return self._override_bg_logger.get() or self._global_bg_logger.get() # Should only be called by the login function. def login_replace_api_conn(self, api_conn: "HTTPConnection"): @@ -1436,21 +1438,21 @@ def _internal_get_global_state() -> BraintrustState: @contextlib.contextmanager def _internal_with_custom_background_logger(): custom_logger = _HTTPBackgroundLogger(LazyValue(lambda: _state.api_conn(), use_mutex=True)) - _state._override_bg_logger.logger = custom_logger + token = _state._override_bg_logger.set(custom_logger) try: yield custom_logger finally: - _state._override_bg_logger.logger = None + _state._override_bg_logger.reset(token) @contextlib.contextmanager def _internal_with_memory_background_logger(): memory_logger = _MemoryBackgroundLogger() - _state._override_bg_logger.logger = memory_logger + token = _state._override_bg_logger.set(memory_logger) try: yield memory_logger finally: - _state._override_bg_logger.logger = None + _state._override_bg_logger.reset(token) @dataclasses.dataclass diff --git a/py/src/braintrust/wrappers/openai.py b/py/src/braintrust/wrappers/openai.py index 697dbc643..9fcd18fe1 100644 --- a/py/src/braintrust/wrappers/openai.py +++ b/py/src/braintrust/wrappers/openai.py @@ -8,6 +8,7 @@ import braintrust from agents import tracing from braintrust.logger import NOOP_SPAN +from braintrust.wrappers.threads import setup_threads def _span_type(span: tracing.Span[Any]) -> braintrust.SpanTypeAttribute: @@ -62,6 +63,7 @@ class BraintrustTracingProcessor(tracing.TracingProcessor): """ def __init__(self, logger: braintrust.Span | braintrust.Experiment | braintrust.Logger | None = None): + setup_threads() # Propagate ContextVars across worker threads (agents SDK >= 0.8) self._logger = logger self._spans: dict[str, braintrust.Span] = {} self._first_input: dict[str, Any] = {}