diff --git a/py/examples/auto_instrument.py b/py/examples/auto_instrument.py new file mode 100644 index 000000000..33fa278a8 --- /dev/null +++ b/py/examples/auto_instrument.py @@ -0,0 +1,69 @@ +""" +Example: Auto-instrumentation with Braintrust + +This example demonstrates one-line auto-instrumentation for multiple AI libraries. +Run with: python examples/auto_instrument.py + +Supported integrations: +- OpenAI +- Anthropic +- LiteLLM +- Pydantic AI +- Google GenAI +- Agno +- Claude Agent SDK +- DSPy +""" + +import braintrust + +# One-line instrumentation - call this BEFORE importing AI libraries +# This patches all supported libraries automatically +results = braintrust.auto_instrument() + +# Show what was instrumented +print("Instrumentation results:") +for lib, success in results.items(): + status = "yes" if success else "no (not installed)" + print(f" {lib}: {status}") +print() + +# Initialize Braintrust logging +logger = braintrust.init_logger(project="auto-instrument-demo") + +# Now import and use AI libraries normally - all calls are traced! +# IMPORTANT: Import AI libraries AFTER calling auto_instrument() +import anthropic +import openai + +# Create clients - they're automatically wrapped +openai_client = openai.OpenAI() +anthropic_client = anthropic.Anthropic() + +# Wrap in a manual span to get a link +with braintrust.start_span(name="auto_instrument_example") as span: + # OpenAI call - automatically traced as child span + print("Calling OpenAI...") + openai_response = openai_client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Say hello in 3 words"}], + ) + print(f" OpenAI: {openai_response.choices[0].message.content}") + + # Anthropic call - automatically traced as child span + print("Calling Anthropic...") + anthropic_response = anthropic_client.messages.create( + model="claude-3-5-haiku-latest", + max_tokens=100, + messages=[{"role": "user", "content": "Say goodbye in 3 words"}], + ) + print(f" Anthropic: {anthropic_response.content[0].text}") + + span.log( + output={ + "openai": openai_response.choices[0].message.content, + "anthropic": anthropic_response.content[0].text, + } + ) + +print(f"\nView trace: {span.link()}") diff --git a/py/noxfile.py b/py/noxfile.py index 44f8cda04..33d5644e8 100644 --- a/py/noxfile.py +++ b/py/noxfile.py @@ -122,6 +122,7 @@ def test_claude_agent_sdk(session, version): def test_agno(session, version): _install_test_deps(session) _install(session, "agno", version) + _install(session, "openai") # Required for agno.models.openai _run_tests(session, f"{WRAPPER_DIR}/test_agno.py") _run_core_tests(session) diff --git a/py/setup.py b/py/setup.py index cd95c2570..9d563e3a0 100644 --- a/py/setup.py +++ b/py/setup.py @@ -54,7 +54,7 @@ ], package_dir={"": "src"}, packages=setuptools.find_packages(where="src"), - package_data={"braintrust": ["py.typed"]}, + package_data={"braintrust": ["py.typed", "wrappers/cassettes/*.yaml"]}, python_requires=">=3.10.0", entry_points={"console_scripts": ["braintrust = braintrust.cli.__main__:main"]}, install_requires=install_requires, diff --git a/py/src/braintrust/__init__.py b/py/src/braintrust/__init__.py index 50954c620..46019a3ba 100644 --- a/py/src/braintrust/__init__.py +++ b/py/src/braintrust/__init__.py @@ -50,6 +50,10 @@ def is_equal(expected, output): """ from .audit import * +from .auto import ( + auto_instrument, # noqa: F401 # type: ignore[reportUnusedImport] + auto_uninstrument, # noqa: F401 # type: ignore[reportUnusedImport] +) from .framework import * from .framework2 import * from .functions.invoke import * @@ -62,6 +66,8 @@ def is_equal(expected, output): _internal_with_custom_background_logger, # noqa: F401 # type: ignore[reportUnusedImport] ) from .oai import ( + patch_openai, # noqa: F401 # type: ignore[reportUnusedImport] + unpatch_openai, # noqa: F401 # type: ignore[reportUnusedImport] wrap_openai, # noqa: F401 # type: ignore[reportUnusedImport] ) from .util import ( @@ -69,11 +75,23 @@ def is_equal(expected, output): MarkAsyncWrapper, # noqa: F401 # type: ignore[reportUnusedImport] ) from .wrappers.anthropic import ( + patch_anthropic, # noqa: F401 # type: ignore[reportUnusedImport] + unpatch_anthropic, # noqa: F401 # type: ignore[reportUnusedImport] wrap_anthropic, # noqa: F401 # type: ignore[reportUnusedImport] ) from .wrappers.litellm import ( + patch_litellm, # noqa: F401 # type: ignore[reportUnusedImport] + unpatch_litellm, # noqa: F401 # type: ignore[reportUnusedImport] wrap_litellm, # noqa: F401 # type: ignore[reportUnusedImport] ) + +try: + from .wrappers.dspy import ( + patch_dspy, # noqa: F401 # type: ignore[reportUnusedImport] + unpatch_dspy, # noqa: F401 # type: ignore[reportUnusedImport] + ) +except ImportError: + pass # dspy not installed from .wrappers.pydantic_ai import ( setup_pydantic_ai, # noqa: F401 # type: ignore[reportUnusedImport] ) diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py new file mode 100644 index 000000000..370677c69 --- /dev/null +++ b/py/src/braintrust/auto.py @@ -0,0 +1,259 @@ +""" +Auto-instrumentation for AI/ML libraries. + +Provides one-line instrumentation for supported libraries. +""" + +from __future__ import annotations + +import logging +from contextlib import contextmanager + +__all__ = ["auto_instrument", "auto_uninstrument"] + +logger = logging.getLogger(__name__) + + +@contextmanager +def _try_patch(): + """Context manager that suppresses ImportError and logs other exceptions.""" + try: + yield + except ImportError: + pass + except Exception: + logger.exception("Failed to instrument") + + +def auto_instrument( + *, + openai: bool = True, + anthropic: bool = True, + litellm: bool = True, + pydantic_ai: bool = True, + google_genai: bool = True, + agno: bool = True, + claude_agent_sdk: bool = True, + dspy: bool = True, +) -> dict[str, bool]: + """ + Auto-instrument supported AI/ML libraries for Braintrust tracing. + + Safe to call multiple times - already instrumented libraries are skipped. + + Note on import order: If you use `from openai import OpenAI` style imports, + call auto_instrument() first. If you use `import openai` style imports, + order doesn't matter since attribute lookup happens dynamically. + + Args: + openai: Enable OpenAI instrumentation (default: True) + anthropic: Enable Anthropic instrumentation (default: True) + litellm: Enable LiteLLM instrumentation (default: True) + pydantic_ai: Enable Pydantic AI instrumentation (default: True) + google_genai: Enable Google GenAI instrumentation (default: True) + agno: Enable Agno instrumentation (default: True) + claude_agent_sdk: Enable Claude Agent SDK instrumentation (default: True) + dspy: Enable DSPy instrumentation (default: True) + + Returns: + Dict mapping integration name to whether it was successfully instrumented. + + Example: + ```python + import braintrust + braintrust.auto_instrument() + + # OpenAI + import openai + client = openai.OpenAI() + client.chat.completions.create(model="gpt-4o-mini", messages=[...]) + + # Anthropic + import anthropic + client = anthropic.Anthropic() + client.messages.create(model="claude-sonnet-4-20250514", messages=[...]) + + # LiteLLM + import litellm + litellm.completion(model="gpt-4o-mini", messages=[...]) + + # DSPy + import dspy + lm = dspy.LM("openai/gpt-4o-mini") + dspy.configure(lm=lm) + + # Pydantic AI + from pydantic_ai import Agent + agent = Agent("openai:gpt-4o-mini") + result = agent.run_sync("Hello!") + + # Google GenAI + from google.genai import Client + client = Client() + client.models.generate_content(model="gemini-2.0-flash", contents="Hello!") + ``` + """ + results = {} + + if openai: + results["openai"] = _instrument_openai() + if anthropic: + results["anthropic"] = _instrument_anthropic() + if litellm: + results["litellm"] = _instrument_litellm() + if pydantic_ai: + results["pydantic_ai"] = _instrument_pydantic_ai() + if google_genai: + results["google_genai"] = _instrument_google_genai() + if agno: + results["agno"] = _instrument_agno() + if claude_agent_sdk: + results["claude_agent_sdk"] = _instrument_claude_agent_sdk() + if dspy: + results["dspy"] = _instrument_dspy() + + return results + + +def _instrument_openai() -> bool: + with _try_patch(): + from braintrust.oai import patch_openai + + return patch_openai() + return False + + +def _instrument_anthropic() -> bool: + with _try_patch(): + from braintrust.wrappers.anthropic import patch_anthropic + + return patch_anthropic() + return False + + +def _instrument_litellm() -> bool: + with _try_patch(): + from braintrust.wrappers.litellm import patch_litellm + + return patch_litellm() + return False + + +def _instrument_pydantic_ai() -> bool: + with _try_patch(): + from braintrust.wrappers.pydantic_ai import setup_pydantic_ai + + return setup_pydantic_ai() + return False + + +def _instrument_google_genai() -> bool: + with _try_patch(): + from braintrust.wrappers.google_genai import setup_genai + + return setup_genai() + return False + + +def _instrument_agno() -> bool: + with _try_patch(): + from braintrust.wrappers.agno import setup_agno + + return setup_agno() + return False + + +def _instrument_claude_agent_sdk() -> bool: + with _try_patch(): + from braintrust.wrappers.claude_agent_sdk import setup_claude_agent_sdk + + return setup_claude_agent_sdk() + return False + + +def _instrument_dspy() -> bool: + with _try_patch(): + from braintrust.wrappers.dspy import patch_dspy + + return patch_dspy() + return False + + +def auto_uninstrument( + *, + openai: bool = True, + anthropic: bool = True, + litellm: bool = True, + dspy: bool = True, +) -> dict[str, bool]: + """ + Remove auto-instrumentation from supported AI/ML libraries. + + This undoes the patching done by auto_instrument() for libraries that + support unpatching (OpenAI, Anthropic, LiteLLM, DSPy). + + Note: Some libraries (Pydantic AI, Google GenAI, Agno, Claude Agent SDK) + use setup-style instrumentation that cannot be reversed. + + Args: + openai: Disable OpenAI instrumentation (default: True) + anthropic: Disable Anthropic instrumentation (default: True) + litellm: Disable LiteLLM instrumentation (default: True) + dspy: Disable DSPy instrumentation (default: True) + + Returns: + Dict mapping integration name to whether it was successfully uninstrumented. + + Example: + ```python + import braintrust + + braintrust.auto_instrument() + # ... use traced clients ... + braintrust.auto_uninstrument() # Restore original behavior + ``` + """ + results = {} + + if openai: + results["openai"] = _uninstrument_openai() + if anthropic: + results["anthropic"] = _uninstrument_anthropic() + if litellm: + results["litellm"] = _uninstrument_litellm() + if dspy: + results["dspy"] = _uninstrument_dspy() + + return results + + +def _uninstrument_openai() -> bool: + with _try_patch(): + from braintrust.oai import unpatch_openai + + return unpatch_openai() + return False + + +def _uninstrument_anthropic() -> bool: + with _try_patch(): + from braintrust.wrappers.anthropic import unpatch_anthropic + + return unpatch_anthropic() + return False + + +def _uninstrument_litellm() -> bool: + with _try_patch(): + from braintrust.wrappers.litellm import unpatch_litellm + + return unpatch_litellm() + return False + + +def _uninstrument_dspy() -> bool: + with _try_patch(): + from braintrust.wrappers.dspy import unpatch_dspy + + return unpatch_dspy() + return False diff --git a/py/src/braintrust/conftest.py b/py/src/braintrust/conftest.py index 772c4d505..0af194eb7 100644 --- a/py/src/braintrust/conftest.py +++ b/py/src/braintrust/conftest.py @@ -3,6 +3,18 @@ import pytest +@pytest.fixture(scope="module") +def vcr_config(): + """Global VCR config to filter sensitive headers from all cassettes.""" + return { + "filter_headers": [ + "authorization", + "x-api-key", + "openai-organization", + ] + } + + @pytest.fixture(autouse=True) def override_app_url_for_tests(): """ diff --git a/py/src/braintrust/oai.py b/py/src/braintrust/oai.py index 13ef79553..d8de85e70 100644 --- a/py/src/braintrust/oai.py +++ b/py/src/braintrust/oai.py @@ -986,3 +986,94 @@ def _is_not_given(value: Any) -> bool: return type_name == "NotGiven" except Exception: return False + + +def patch_openai() -> bool: + """ + Patch OpenAI to add Braintrust tracing globally. + + After calling this, all new OpenAI() and AsyncOpenAI() clients + will automatically have tracing enabled. + + Returns: + True if OpenAI was patched (or already patched), False if OpenAI is not installed. + + Example: + ```python + import braintrust + braintrust.patch_openai() + + import openai + client = openai.OpenAI() + # All calls are now traced! + ``` + """ + try: + import openai + + if hasattr(openai, "_braintrust_wrapped"): + return True # Already patched + + # Store originals for unpatch + openai._braintrust_original_OpenAI = openai.OpenAI + openai._braintrust_original_AsyncOpenAI = openai.AsyncOpenAI + + # Create patched classes + class PatchedOpenAI(openai._braintrust_original_OpenAI): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + _apply_openai_wrapper(self) + + class PatchedAsyncOpenAI(openai._braintrust_original_AsyncOpenAI): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + _apply_openai_wrapper(self) + + # Replace classes + openai.OpenAI = PatchedOpenAI + openai.AsyncOpenAI = PatchedAsyncOpenAI + openai._braintrust_wrapped = True + return True + + except ImportError: + return False + + +def unpatch_openai() -> bool: + """ + Restore OpenAI to its original state, removing Braintrust tracing. + + Returns: + True if OpenAI was unpatched (or wasn't patched), False if OpenAI is not installed. + + Example: + ```python + import braintrust + braintrust.patch_openai() + # ... use traced clients ... + braintrust.unpatch_openai() # Restore original behavior + ``` + """ + try: + import openai + + if hasattr(openai, "_braintrust_wrapped"): + openai.OpenAI = openai._braintrust_original_OpenAI + openai.AsyncOpenAI = openai._braintrust_original_AsyncOpenAI + + delattr(openai, "_braintrust_wrapped") + delattr(openai, "_braintrust_original_OpenAI") + delattr(openai, "_braintrust_original_AsyncOpenAI") + + return True + + except ImportError: + return False + + +def _apply_openai_wrapper(client): + """Apply tracing wrapper to an OpenAI client instance in-place.""" + wrapped = wrap_openai(client) + for attr in ("chat", "responses", "embeddings", "moderations", "beta"): + if hasattr(wrapped, attr): + setattr(client, attr, getattr(wrapped, attr)) diff --git a/py/src/braintrust/wrappers/agno/__init__.py b/py/src/braintrust/wrappers/agno/__init__.py index 86324047e..6e695eca1 100644 --- a/py/src/braintrust/wrappers/agno/__init__.py +++ b/py/src/braintrust/wrappers/agno/__init__.py @@ -62,7 +62,6 @@ def setup_agno( models.base.Model = wrap_model(models.base.Model) # pyright: ignore[reportUnknownMemberType] tools.function.FunctionCall = wrap_function_call(tools.function.FunctionCall) # pyright: ignore[reportUnknownMemberType] return True - except ImportError as e: - logger.error(f"Failed to import Agno: {e}") - logger.error("Agno is not installed. Please install it with: pip install agno") + except ImportError: + # Not installed - this is expected when using auto_instrument() return False diff --git a/py/src/braintrust/wrappers/anthropic.py b/py/src/braintrust/wrappers/anthropic.py index 9573b92f4..3839f5da2 100644 --- a/py/src/braintrust/wrappers/anthropic.py +++ b/py/src/braintrust/wrappers/anthropic.py @@ -358,3 +358,102 @@ def wrap_anthropic(client): def wrap_anthropic_client(client): return wrap_anthropic(client) + + +def patch_anthropic() -> bool: + """ + Patch Anthropic to add Braintrust tracing globally. + + After calling this, all new Anthropic() and AsyncAnthropic() clients + will automatically have tracing enabled. + + Returns: + True if Anthropic was patched (or already patched), False if Anthropic is not installed. + + Example: + ```python + import braintrust + braintrust.patch_anthropic() + + import anthropic + client = anthropic.Anthropic() + # All calls are now traced! + ``` + """ + try: + import anthropic + + if hasattr(anthropic, "_braintrust_wrapped"): + return True # Already patched + + # Store originals for unpatch + anthropic._braintrust_original_Anthropic = anthropic.Anthropic + anthropic._braintrust_original_AsyncAnthropic = anthropic.AsyncAnthropic + + # Create patched classes + class PatchedAnthropic(anthropic._braintrust_original_Anthropic): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + _apply_anthropic_wrapper(self) + + class PatchedAsyncAnthropic(anthropic._braintrust_original_AsyncAnthropic): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + _apply_async_anthropic_wrapper(self) + + # Replace classes + anthropic.Anthropic = PatchedAnthropic + anthropic.AsyncAnthropic = PatchedAsyncAnthropic + anthropic._braintrust_wrapped = True + return True + + except ImportError: + return False + + +def unpatch_anthropic() -> bool: + """ + Restore Anthropic to its original state, removing Braintrust tracing. + + Returns: + True if Anthropic was unpatched (or wasn't patched), False if Anthropic is not installed. + + Example: + ```python + import braintrust + braintrust.patch_anthropic() + # ... use traced clients ... + braintrust.unpatch_anthropic() # Restore original behavior + ``` + """ + try: + import anthropic + + if hasattr(anthropic, "_braintrust_wrapped"): + anthropic.Anthropic = anthropic._braintrust_original_Anthropic + anthropic.AsyncAnthropic = anthropic._braintrust_original_AsyncAnthropic + + delattr(anthropic, "_braintrust_wrapped") + delattr(anthropic, "_braintrust_original_Anthropic") + delattr(anthropic, "_braintrust_original_AsyncAnthropic") + + return True + + except ImportError: + return False + + +def _apply_anthropic_wrapper(client): + """Apply tracing wrapper to an Anthropic client instance in-place.""" + wrapped = wrap_anthropic(client) + client.messages = wrapped.messages + if hasattr(wrapped, "beta"): + client.beta = wrapped.beta + + +def _apply_async_anthropic_wrapper(client): + """Apply tracing wrapper to an AsyncAnthropic client instance in-place.""" + wrapped = wrap_anthropic(client) + client.messages = wrapped.messages + if hasattr(wrapped, "beta"): + client.beta = wrapped.beta diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_agno.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_agno.py new file mode 100644 index 000000000..a57fa8e94 --- /dev/null +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_agno.py @@ -0,0 +1,32 @@ +"""Test auto_instrument for Agno (no uninstrument available).""" + +from braintrust.auto import auto_instrument +from braintrust.wrappers.test_utils import autoinstrument_test_context + +# 1. Instrument +results = auto_instrument() +assert results.get("agno") == True + +# 2. Idempotent +results2 = auto_instrument() +assert results2.get("agno") == True + +# 3. Make API call and verify span +with autoinstrument_test_context("test_auto_agno") as memory_logger: + from agno.agent import Agent + from agno.models.openai import OpenAIChat + + agent = Agent( + name="Test Agent", + model=OpenAIChat(id="gpt-4o-mini"), + instructions="You are a helpful assistant. Be brief.", + ) + + response = agent.run("Say hi") + assert response + assert response.content + + spans = memory_logger.pop() + assert len(spans) >= 1, f"Expected at least 1 span, got {len(spans)}" + +print("SUCCESS") diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_anthropic.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_anthropic.py new file mode 100644 index 000000000..fd2aebb14 --- /dev/null +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_anthropic.py @@ -0,0 +1,53 @@ +"""Test auto_instrument/auto_uninstrument for Anthropic.""" + +import anthropic +from braintrust.auto import auto_instrument, auto_uninstrument +from braintrust.wrappers.test_utils import autoinstrument_test_context + +# 1. Verify not patched initially +assert not hasattr(anthropic, "_braintrust_wrapped") + +# 2. Instrument +results = auto_instrument() +assert results.get("anthropic") == True +assert hasattr(anthropic, "_braintrust_wrapped") + +# 3. Idempotent +results2 = auto_instrument() +assert results2.get("anthropic") == True + +# 4. Make API call and verify span +with autoinstrument_test_context("test_auto_anthropic") as memory_logger: + client = anthropic.Anthropic() + response = client.messages.create( + model="claude-3-5-haiku-20241022", + max_tokens=100, + messages=[{"role": "user", "content": "Say hi"}], + ) + assert response.content[0].text + + spans = memory_logger.pop() + assert len(spans) == 1, f"Expected 1 span, got {len(spans)}" + span = spans[0] + assert span["metadata"]["provider"] == "anthropic" + assert "claude" in span["metadata"]["model"] + +# 5. Uninstrument +results3 = auto_uninstrument() +assert results3.get("anthropic") == True +assert not hasattr(anthropic, "_braintrust_wrapped") + +# 6. Verify no spans after uninstrument +with autoinstrument_test_context("test_auto_anthropic_uninstrumented") as memory_logger: + client = anthropic.Anthropic() + response = client.messages.create( + model="claude-3-5-haiku-20241022", + max_tokens=100, + messages=[{"role": "user", "content": "Say hi again"}], + ) + assert response.content[0].text + + spans = memory_logger.pop() + assert len(spans) == 0, f"Expected 0 spans after uninstrument, got {len(spans)}" + +print("SUCCESS") diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_claude_agent_sdk.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_claude_agent_sdk.py new file mode 100644 index 000000000..e6d0112b0 --- /dev/null +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_claude_agent_sdk.py @@ -0,0 +1,35 @@ +"""Test auto_instrument for Claude Agent SDK (no uninstrument available).""" + +from braintrust.auto import auto_instrument +from braintrust.wrappers.test_utils import autoinstrument_test_context + +# 1. Instrument +results = auto_instrument() +assert results.get("claude_agent_sdk") == True + +# 2. Idempotent +results2 = auto_instrument() +assert results2.get("claude_agent_sdk") == True + +# 3. Make API call and verify span +with autoinstrument_test_context("test_auto_claude_agent_sdk") as memory_logger: + import claude_agent_sdk + + options = claude_agent_sdk.ClaudeAgentOptions(model="claude-3-5-haiku-20241022") + + async def run_agent(): + async with claude_agent_sdk.ClaudeSDKClient(options=options) as client: + await client.query("Say hi") + async for message in client.receive_response(): + if type(message).__name__ == "ResultMessage": + return message + return None + + import asyncio + result = asyncio.run(run_agent()) + assert result is not None + + spans = memory_logger.pop() + assert len(spans) >= 1, f"Expected at least 1 span, got {len(spans)}" + +print("SUCCESS") diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_dspy.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_dspy.py new file mode 100644 index 000000000..8ded4172e --- /dev/null +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_dspy.py @@ -0,0 +1,36 @@ +"""Test auto_instrument/auto_uninstrument for DSPy. + +Note: This test focuses on patching behavior only. Span verification for DSPy +is done in test_dspy.py::test_dspy_callback which uses pytest-vcr (supports httpx). +The standalone VCR in test_utils doesn't capture httpx used by litellm/dspy. +""" + +import dspy +from braintrust.auto import auto_instrument, auto_uninstrument +from braintrust.wrappers.dspy import BraintrustDSpyCallback + +# 1. Verify not patched initially +assert not hasattr(dspy, "_braintrust_wrapped") + +# 2. Instrument +results = auto_instrument() +assert results.get("dspy") == True +assert hasattr(dspy, "_braintrust_wrapped") + +# 3. Idempotent +results2 = auto_instrument() +assert results2.get("dspy") == True + +# 4. Verify callback is added when configure() is called +dspy.configure(lm=None) +from dspy.dsp.utils.settings import settings + +has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in settings.callbacks) +assert has_bt_callback, f"Expected BraintrustDSpyCallback in callbacks after configure()" + +# 5. Uninstrument +results3 = auto_uninstrument() +assert results3.get("dspy") == True +assert not hasattr(dspy, "_braintrust_wrapped") + +print("SUCCESS") diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_google_genai.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_google_genai.py new file mode 100644 index 000000000..4645ae0db --- /dev/null +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_google_genai.py @@ -0,0 +1,32 @@ +"""Test auto_instrument for Google GenAI (no uninstrument available).""" + +from braintrust.auto import auto_instrument +from braintrust.wrappers.test_utils import autoinstrument_test_context + +# 1. Instrument +results = auto_instrument() +assert results.get("google_genai") == True + +# 2. Idempotent +results2 = auto_instrument() +assert results2.get("google_genai") == True + +# 3. Make API call and verify span +with autoinstrument_test_context("test_auto_google_genai") as memory_logger: + from google.genai import types + from google.genai.client import Client + + client = Client() + response = client.models.generate_content( + model="gemini-2.0-flash-001", + contents="Say hi", + config=types.GenerateContentConfig(max_output_tokens=100), + ) + assert response.text + + spans = memory_logger.pop() + assert len(spans) == 1, f"Expected 1 span, got {len(spans)}" + span = spans[0] + assert "gemini" in span["metadata"]["model"] + +print("SUCCESS") diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_litellm.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_litellm.py new file mode 100644 index 000000000..c489097dc --- /dev/null +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_litellm.py @@ -0,0 +1,48 @@ +"""Test auto_instrument/auto_uninstrument for LiteLLM.""" + +import litellm +from braintrust.auto import auto_instrument, auto_uninstrument +from braintrust.wrappers.test_utils import autoinstrument_test_context + +# 1. Verify not patched initially +assert not hasattr(litellm, "_braintrust_wrapped") + +# 2. Instrument +results = auto_instrument() +assert results.get("litellm") == True +assert hasattr(litellm, "_braintrust_wrapped") + +# 3. Idempotent +results2 = auto_instrument() +assert results2.get("litellm") == True + +# 4. Make API call and verify span +with autoinstrument_test_context("test_auto_litellm") as memory_logger: + response = litellm.completion( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Say hi"}], + ) + assert response.choices[0].message.content + + spans = memory_logger.pop() + assert len(spans) == 1, f"Expected 1 span, got {len(spans)}" + span = spans[0] + assert span["metadata"]["provider"] == "litellm" + +# 5. Uninstrument +results3 = auto_uninstrument() +assert results3.get("litellm") == True +assert not hasattr(litellm, "_braintrust_wrapped") + +# 6. Verify no spans after uninstrument +with autoinstrument_test_context("test_auto_litellm_uninstrumented") as memory_logger: + response = litellm.completion( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Say hi again"}], + ) + assert response.choices[0].message.content + + spans = memory_logger.pop() + assert len(spans) == 0, f"Expected 0 spans after uninstrument, got {len(spans)}" + +print("SUCCESS") diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_openai.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_openai.py new file mode 100644 index 000000000..3b2154569 --- /dev/null +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_openai.py @@ -0,0 +1,51 @@ +"""Test auto_instrument/auto_uninstrument for OpenAI.""" + +import openai +from braintrust.auto import auto_instrument, auto_uninstrument +from braintrust.wrappers.test_utils import autoinstrument_test_context + +# 1. Verify not patched initially +assert not hasattr(openai, "_braintrust_wrapped") + +# 2. Instrument +results = auto_instrument() +assert results.get("openai") == True +assert hasattr(openai, "_braintrust_wrapped") + +# 3. Idempotent +results2 = auto_instrument() +assert results2.get("openai") == True + +# 4. Make API call and verify span +with autoinstrument_test_context("test_auto_openai") as memory_logger: + client = openai.OpenAI() + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Say hi"}], + ) + assert response.choices[0].message.content + + spans = memory_logger.pop() + assert len(spans) == 1, f"Expected 1 span, got {len(spans)}" + span = spans[0] + assert span["metadata"]["provider"] == "openai" + assert "gpt-4o-mini" in span["metadata"]["model"] + +# 5. Uninstrument +results3 = auto_uninstrument() +assert results3.get("openai") == True +assert not hasattr(openai, "_braintrust_wrapped") + +# 6. Verify no spans after uninstrument +with autoinstrument_test_context("test_auto_openai_uninstrumented") as memory_logger: + client = openai.OpenAI() + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Say hi again"}], + ) + assert response.choices[0].message.content + + spans = memory_logger.pop() + assert len(spans) == 0, f"Expected 0 spans after uninstrument, got {len(spans)}" + +print("SUCCESS") diff --git a/py/src/braintrust/wrappers/auto_test_scripts/test_auto_pydantic_ai.py b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_pydantic_ai.py new file mode 100644 index 000000000..c6b874845 --- /dev/null +++ b/py/src/braintrust/wrappers/auto_test_scripts/test_auto_pydantic_ai.py @@ -0,0 +1,35 @@ +"""Test auto_instrument for Pydantic AI (no uninstrument available).""" + +from braintrust.auto import auto_instrument +from braintrust.wrappers.test_utils import autoinstrument_test_context + +# 1. Instrument +results = auto_instrument() +assert results.get("pydantic_ai") == True + +# 2. Idempotent +results2 = auto_instrument() +assert results2.get("pydantic_ai") == True + +# 3. Make API call and verify span +with autoinstrument_test_context("test_auto_pydantic_ai") as memory_logger: + from pydantic_ai import Agent + from pydantic_ai.models.openai import OpenAIChatModel + from pydantic_ai.settings import ModelSettings + + agent = Agent( + OpenAIChatModel("gpt-4o-mini"), + model_settings=ModelSettings(max_tokens=100), + ) + + import asyncio + result = asyncio.run(agent.run("Say hi")) + assert result.output + + spans = memory_logger.pop() + assert len(spans) >= 1, f"Expected at least 1 span, got {len(spans)}" + # Find the agent_run span + agent_spans = [s for s in spans if "agent_run" in s["span_attributes"]["name"]] + assert len(agent_spans) >= 1, f"Expected agent_run span, got {[s['span_attributes']['name'] for s in spans]}" + +print("SUCCESS") diff --git a/py/src/braintrust/wrappers/cassettes/TestPatchAnthropicAsyncSpans.test_patch_anthropic_async_creates_spans.yaml b/py/src/braintrust/wrappers/cassettes/TestPatchAnthropicAsyncSpans.test_patch_anthropic_async_creates_spans.yaml new file mode 100644 index 000000000..ac5b8caa6 --- /dev/null +++ b/py/src/braintrust/wrappers/cassettes/TestPatchAnthropicAsyncSpans.test_patch_anthropic_async_creates_spans.yaml @@ -0,0 +1,107 @@ +interactions: +- request: + body: '{"max_tokens":100,"messages":[{"role":"user","content":"Say hi async"}],"model":"claude-3-5-haiku-latest"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '106' + Content-Type: + - application/json + Host: + - api.anthropic.com + User-Agent: + - PatchedAsyncAnthropic/Python 0.76.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 0.76.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + anthropic-version: + - '2023-06-01' + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - '600' + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAA/3VRTW8TMRD9K8YXQEqk7NIKtBduKAcuSKCqJcix7GltxTuz2GPaVZT/zjj9blVf + bL335s288V6P5CHpQbtkq4flp+XpMti4q8t+1Z90q77XCx29CMZyZVZdf/795ny6oPWP67N48WXq + fo3ffnrR8DxBU0Ep9goEyJQaYEuJhS2yQI6QQV7D7/29nuGmMcdr0GvI8L4oi8qWGV3IhFSLuqzo + OBIqJlXsrEIcNrjB7XY7zRxInke58nDZeBPih4+iUHIycM2oNnodFQexf7fRx8pmcEY1eTVTVSnu + QI3QGngYCQtny6ACXTfI2ZSkOpbbsR7m+aoPfxa6ME0mgy2EEgHQm9ZS3xEF/lZAJ1mxprTQ9bie + Ya8jTpUN0w6w6KFbyXqsC2CcWDVz81zwwAvt3+Lua5s/TAFGyDaZ0/G1/pHtwkv2sNBU+Sl08lnS + QP4XHRiOkCVn+1Nvs9eHw3/q92SIRQIAAA== + headers: + CF-RAY: + - 9be009abe8aa4e4d-EWR + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 14 Jan 2026 20:56:04 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 27796668-7351-40ac-acc4-024aee8995a5 + anthropic-ratelimit-input-tokens-limit: + - '5000000' + anthropic-ratelimit-input-tokens-remaining: + - '5000000' + anthropic-ratelimit-input-tokens-reset: + - '2026-01-14T20:56:03Z' + anthropic-ratelimit-output-tokens-limit: + - '1000000' + anthropic-ratelimit-output-tokens-remaining: + - '1000000' + anthropic-ratelimit-output-tokens-reset: + - '2026-01-14T20:56:04Z' + anthropic-ratelimit-requests-limit: + - '10000' + anthropic-ratelimit-requests-remaining: + - '9999' + anthropic-ratelimit-requests-reset: + - '2026-01-14T20:56:03Z' + anthropic-ratelimit-tokens-limit: + - '6000000' + anthropic-ratelimit-tokens-remaining: + - '6000000' + anthropic-ratelimit-tokens-reset: + - '2026-01-14T20:56:03Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_011CX7wMPtPdemQvSMv8LCMY + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '1286' + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/TestPatchAnthropicSpans.test_patch_anthropic_creates_spans.yaml b/py/src/braintrust/wrappers/cassettes/TestPatchAnthropicSpans.test_patch_anthropic_creates_spans.yaml new file mode 100644 index 000000000..b1f532c29 --- /dev/null +++ b/py/src/braintrust/wrappers/cassettes/TestPatchAnthropicSpans.test_patch_anthropic_creates_spans.yaml @@ -0,0 +1,105 @@ +interactions: +- request: + body: '{"max_tokens":100,"messages":[{"role":"user","content":"Say hi"}],"model":"claude-3-5-haiku-latest"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '100' + Content-Type: + - application/json + Host: + - api.anthropic.com + User-Agent: + - PatchedAnthropic/Python 0.76.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 0.76.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + anthropic-version: + - '2023-06-01' + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - '600' + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAA/3WQT0/DMAzFv0rxuZXabjusFyTgsMMucEGAUBQS00RLky5xYFXV7046UfFPnGy9 + 3/OT7RE6J9FAA8LwKLFYFZtCcX2IRV3W66qsa8hBy2ToQsvKar8V69Xu5ebx9Xhd9VcP96e7W7VP + Hhp6nF0YAm8xCd6ZWeAh6EDcUpKEs4Spa57GxU94msm5NLDTGSn0eJHt3HvGPWaDi5l02rYZOcmH + S5iecwjkeuaRB2fTEFrJKHoLnyDgMaIVKd1GY3KI54WaEbTtIzFyB7QBmm3ahwuFTKQk0s6yn7xc + eMLyP7bMzvHYK+zQc8M23V//F63Ubzrl4CJ9l6o6HYP+TQtkpNGnM+cnSu4lTNMHasIXYLYBAAA= + headers: + CF-RAY: + - 9bde85cdffba42f2-EWR + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 14 Jan 2026 16:31:16 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 27796668-7351-40ac-acc4-024aee8995a5 + anthropic-ratelimit-input-tokens-limit: + - '5000000' + anthropic-ratelimit-input-tokens-remaining: + - '5000000' + anthropic-ratelimit-input-tokens-reset: + - '2026-01-14T16:31:16Z' + anthropic-ratelimit-output-tokens-limit: + - '1000000' + anthropic-ratelimit-output-tokens-remaining: + - '1000000' + anthropic-ratelimit-output-tokens-reset: + - '2026-01-14T16:31:16Z' + anthropic-ratelimit-requests-limit: + - '10000' + anthropic-ratelimit-requests-remaining: + - '9999' + anthropic-ratelimit-requests-reset: + - '2026-01-14T16:31:16Z' + anthropic-ratelimit-tokens-limit: + - '6000000' + anthropic-ratelimit-tokens-remaining: + - '6000000' + anthropic-ratelimit-tokens-reset: + - '2026-01-14T16:31:16Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_011CX7bAJ22sddhVB8Eb2N2U + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '731' + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/TestPatchOpenAIAsyncSpans.test_patch_openai_async_creates_spans.yaml b/py/src/braintrust/wrappers/cassettes/TestPatchOpenAIAsyncSpans.test_patch_openai_async_creates_spans.yaml new file mode 100644 index 000000000..b1ce141d9 --- /dev/null +++ b/py/src/braintrust/wrappers/cassettes/TestPatchOpenAIAsyncSpans.test_patch_openai_async_creates_spans.yaml @@ -0,0 +1,112 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Say hi async"}],"model":"gpt-4o-mini"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '77' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - PatchedAsyncOpenAI/Python 2.15.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4ySwW6cMBCG7zyF6/NSAUELu5eqSiMlr9AoQl57ALfGY9lD21W07x4Zdhe2TaRe + OMw3//D/43lNGONa8T3jshckB2fS+2Nx/1QVpdJ1/u1hyA62ol5V4vvDjr7yTVTg4QdIuqg+Sxyc + AdJoZyw9CII4Na+2dVmU+baYwIAKTJR1jtIS00FbnRZZUaZZleb1Wd2jlhD4nj0njDH2On2jT6vg + D9+zbHOpDBCC6IDvr02McY8mVrgIQQcSlvhmgRItgZ2sP4Ix+Ik94m8mhWVPbBawI46MUInjl7XQ + QzsGEc3b0ZgVENYiiRh+svxyJqerSYOd83gIf0l5q60OfeNBBLTRUCB0fKKnhLGXaRnjTT7uPA6O + GsKfMP0uP++CL0+wwN2ZEZIwK82lfjOsUUBCm7DaJZdC9qAW5bJ4MSqNK5CsIv/r5b3Zc2xtu/8Z + vwApwRGoxnlQWt7mXdo8xPv8qO264skwD+B/aQkNafDxGRS0YjTz1fBwDARD02rbgXdez6fTuqbY + 3d1lYreta56ckjcAAAD//wMAcecg90gDAAA= + headers: + CF-RAY: + - 9be009a69b7bc953-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 14 Jan 2026 20:56:03 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=B4Tkg3SnpDC1kPZyqRMwNFDkjxxXdNwEYILIz1sZ2vg-1768424163-1.0.1.1-3hI_Dc.bZ7eOoXFnJGbbo3CYX_Ymg8L7.RsTw7UKobrTx7_WCJTzdLC_7mihNSoRbCKTa.wQaTn9kzidpz7YxJVU6GsW4iXN8BirBkqsk4M; + path=/; expires=Wed, 14-Jan-26 21:26:03 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=G5fjKK1baClvzyT8pEAmWLEIkYd89xShJDBbQCQ_NIo-1768424163079-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '404' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '661' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_90096f2452fb405da5a26f82019e4ff4 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/TestPatchOpenAISpans.test_patch_openai_creates_spans.yaml b/py/src/braintrust/wrappers/cassettes/TestPatchOpenAISpans.test_patch_openai_creates_spans.yaml new file mode 100644 index 000000000..2d569eea7 --- /dev/null +++ b/py/src/braintrust/wrappers/cassettes/TestPatchOpenAISpans.test_patch_openai_creates_spans.yaml @@ -0,0 +1,112 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Say hi"}],"model":"gpt-4o-mini"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '71' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - PatchedOpenAI/Python 2.15.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xSy27bMBC86yu2PFuF7cqJ4ksPPdQJ2kMvLYoiEGhyJbGhuAS5Sm0E/veCUmIp + jwK96LCzM5oZ7kMGIIwWWxCqlaw6b/NPh+OH+CPusLtb3pQ/v17X3777w+bzl/u2vBGLxKD9b1T8 + xHqvqPMW2ZAbYRVQMibV1eVFWSzL9UUxAB1ptInWeM4LyjvjTL5erot8eZmvykd2S0ZhFFv4lQEA + PAzf5NNpPIgtLBdPkw5jlA2K7XkJQASyaSJkjCaydCwWE6jIMbrB+s68gx39ASUdXMO4DUfqgUnL + 48c5K2DdR5mcu97aGSCdI5Yp+eD39hE5nR1aanygfXxBFbVxJrZVQBnJJTeRyYsBPWUAt0MT/bNw + wgfqPFdMdzj87mpUE1P9rzEmlnYar8rFG1qVRpbGxlmPQknVop6YU+my14ZmQDZL/NrLW9pjauOa + /5GfAKXQM+rKB9RGPc87rQVMt/mvtXPDg2ERMdwbhRUbDOkVNNayt+PFiHiMjF1VG9dg8MGMZ1P7 + ShWbcrPf7K+UyE7ZXwAAAP//AwCuvReaRAMAAA== + headers: + CF-RAY: + - 9bde85857d3a8191-IAD + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Wed, 14 Jan 2026 16:31:05 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=WC8qx7XxkvNS5wuwlfQhz1YXRyMLnO27mFg6CLFQsBA-1768408265-1.0.1.1-CYX0R1gm_JLPDaW7XX1DXFIeIJcSzOBeQt8GvdCwva0SF6kpS7rj822yvepm_lCRzCmfG4LdKQMzdL6iJwxli_Hn5FpXoHbcVT65fyweftE; + path=/; expires=Wed, 14-Jan-26 17:01:05 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=wyzjAd4Rg15mG6l4SO3Ptarr8MXdf2XE6SjjYZWnlPc-1768408265199-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '525' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '547' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_3a43c0bbc3274bf4b365b54caa285598 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_auto_agno.yaml b/py/src/braintrust/wrappers/cassettes/test_auto_agno.yaml new file mode 100644 index 000000000..338d68c22 --- /dev/null +++ b/py/src/braintrust/wrappers/cassettes/test_auto_agno.yaml @@ -0,0 +1,147 @@ +interactions: +- request: + body: '{"messages":[{"role":"developer","content":"You are a helpful assistant. + Be brief."},{"role":"user","content":"Say hi"}],"model":"gpt-4o-mini"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '143' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - PatchedOpenAI/Python 2.15.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFJBbtswELzrFVuerUKWjdjyJZcWSBG0QC9F0SIQaHIls6G4LLlqawT+ + e0HJsZQ0BXLRYWdnNDPchwxAGC12INRBsuq8zd8Vunj/8RPef9k3n5tN87W3+ue3Zh1u+TaKRWLQ + /gcqfmS9VdR5i2zIjbAKKBmT6nJzVRXrsqy2A9CRRptored8TXlnnMnLolznxSZfbs/sAxmFUezg + ewYA8DB8k0+n8Y/YQbF4nHQYo2xR7C5LACKQTRMhYzSRpWOxmEBFjtEN1m/MG7ih36Ckgw8wbsOR + emDS8ng9ZwVs+iiTc9dbOwOkc8QyJR/83p2R08WhpdYH2sdnVNEYZ+KhDigjueQmMnkxoKcM4G5o + on8STvhAneea6R6H35XlKCem/iewOmNMLO00Xi0XL4jVGlkaG2dFCiXVAfXEnFqXvTY0A7JZ5H+9 + vKQ9xjaufY38BCiFnlHXPqA26mneaS1gOs7/rV0qHgyLiOGXUVizwZCeQWMjezuejIjHyNjVjXEt + Bh/MeDeNr8tqtSpkdbXdiuyU/QUAAP//AwBBs78WRQMAAA== + headers: + CF-RAY: + - 9c1afcdd3f36b231-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 22 Jan 2026 00:38:19 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=Jw95ZRGfTr6qO8YVvMCpB1aMAiti.HWb9WM0o.EAG4M-1769042299-1.0.1.1-F0ol4YtLGC1.t2DHb1Hj435gvyQ_nGNudwYUErS.pg4aWKbU4O68f4wJthw2GUCv2BYU7cC4ZcIA0B6TvaUN7VYsBM5OS7Ccc46cnb7zQ9Y; + path=/; expires=Thu, 22-Jan-26 01:08:19 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=gxrFvllhyUbQeecWVXMHkFhdg_IAJ7CO467JJDSyVA8-1769042299331-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '438' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '490' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999985' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_c68836a69b1549819fb6a5eecfd10be7 + status: + code: 200 + message: OK +- request: + body: '{"session_id":"3ed01154-18cc-4648-b766-73f60e3e08c2","run_id":"4ebf7a0f-31fa-4a69-9500-f3f3f21d350d","data":{"agent_id":"test-agent","db_type":null,"model_provider":"OpenAI","model_name":"OpenAIChat","model_id":"gpt-4o-mini","parser_model":null,"output_model":null,"has_tools":true,"has_memory":false,"has_learnings":false,"has_culture":false,"has_reasoning":false,"has_knowledge":false,"has_input_schema":false,"has_output_schema":false,"has_team":false},"sdk_version":"2.4.1","type":"agent"}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '493' + Content-Type: + - application/json + Host: + - os-api.agno.com + user-agent: + - agno/2.4.1 + method: POST + uri: https://os-api.agno.com/telemetry/runs + response: + body: + string: '{"message":"Run creation acknowledged: 4ebf7a0f-31fa-4a69-9500-f3f3f21d350d","status":"success"}' + headers: + content-length: + - '96' + content-type: + - application/json + date: + - Thu, 22 Jan 2026 00:38:19 GMT + server: + - uvicorn + status: + code: 201 + message: null +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_auto_anthropic.yaml b/py/src/braintrust/wrappers/cassettes/test_auto_anthropic.yaml new file mode 100644 index 000000000..73ee01336 --- /dev/null +++ b/py/src/braintrust/wrappers/cassettes/test_auto_anthropic.yaml @@ -0,0 +1,105 @@ +interactions: +- request: + body: '{"max_tokens":100,"messages":[{"role":"user","content":"Say hi"}],"model":"claude-3-5-haiku-20241022"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '102' + Content-Type: + - application/json + Host: + - api.anthropic.com + User-Agent: + - PatchedAnthropic/Python 0.76.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 0.76.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + anthropic-version: + - '2023-06-01' + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - '600' + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAA/3WQT0vEMBDFv0qdcwtt1x7sRQSFBS9eBFeREJOhjdtmajJxt5R+d9PF4j88zfB+ + bx4zM0FPGjuoQXUyaMw2WZW10uxDVubleZGXJaRgdDT0vhF58bB5fTzcq92Ojrf9XfEy3FxdV030 + 8Djg4kLvZYNRcNQtgvTeeJaWo6TIMsaufppWP+NxIadSw9Yk3KLDs2RLh0Q6TEYKiSZjm4RJy/ES + 5ucUPNMgHEpPNg6h1YKDs/AJPL4FtCqm29B1KYTTQvUExg6BBdMerYf6Iu4jVYtCxSQ2ZMVPnq88 + Yv0fW2eXeBxa7NHJTlT9X/8XLdrfdE6BAn+XijIeg+7dKBRs0MUzlydq6TTM8wcGNCA4tgEAAA== + headers: + CF-RAY: + - 9c1afefa18588183-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 22 Jan 2026 00:39:45 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 27796668-7351-40ac-acc4-024aee8995a5 + anthropic-ratelimit-input-tokens-limit: + - '5000000' + anthropic-ratelimit-input-tokens-remaining: + - '5000000' + anthropic-ratelimit-input-tokens-reset: + - '2026-01-22T00:39:45Z' + anthropic-ratelimit-output-tokens-limit: + - '1000000' + anthropic-ratelimit-output-tokens-remaining: + - '1000000' + anthropic-ratelimit-output-tokens-reset: + - '2026-01-22T00:39:45Z' + anthropic-ratelimit-requests-limit: + - '10000' + anthropic-ratelimit-requests-remaining: + - '9999' + anthropic-ratelimit-requests-reset: + - '2026-01-22T00:39:45Z' + anthropic-ratelimit-tokens-limit: + - '6000000' + anthropic-ratelimit-tokens-remaining: + - '6000000' + anthropic-ratelimit-tokens-reset: + - '2026-01-22T00:39:45Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_011CXMV5BfWXZENVYoJwiZfW + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '859' + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_auto_anthropic_uninstrumented.yaml b/py/src/braintrust/wrappers/cassettes/test_auto_anthropic_uninstrumented.yaml new file mode 100644 index 000000000..a39ade154 --- /dev/null +++ b/py/src/braintrust/wrappers/cassettes/test_auto_anthropic_uninstrumented.yaml @@ -0,0 +1,105 @@ +interactions: +- request: + body: '{"max_tokens":100,"messages":[{"role":"user","content":"Say hi again"}],"model":"claude-3-5-haiku-20241022"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '108' + Content-Type: + - application/json + Host: + - api.anthropic.com + User-Agent: + - Anthropic/Python 0.76.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 0.76.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + anthropic-version: + - '2023-06-01' + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + x-stainless-timeout: + - '600' + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: !!binary | + H4sIAAAAAAAA/3WQT0vEMBDFv0qdcwtNddHtxYMsLP45eRQJIRnasGlSk4nrUvrdnS6usoqnhPd7 + 83gzEwzBoIMWtFPZYHVZrape2V2umrq5EnXTQAnWsGFInazFRjwMUe/v18/q6XF9d3Otu42w7KHD + iIsLU1IdshCDWwSVkk2kPLGkgyfkX/synfyEHws5Pi1sbUE9RrwotmFfqIjFIeTCBOu7goJRh1uY + X0tIFEYZUaXgeQi9kZSjhy+Q8C2j15zus3Ml5GOhdgLrx0ySwg59glbUXEjpHqXmKLLBy3PDN2ds + /mOn2SUfxx4HjMrJ1fDX/0NF/5vOJYRMZ+0a3gbju9UoyWLkPZcrGhUNzPMnGJnrOLcBAAA= + headers: + CF-RAY: + - 9c1aff00ffb1b917-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 22 Jan 2026 00:39:46 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - 27796668-7351-40ac-acc4-024aee8995a5 + anthropic-ratelimit-input-tokens-limit: + - '5000000' + anthropic-ratelimit-input-tokens-remaining: + - '5000000' + anthropic-ratelimit-input-tokens-reset: + - '2026-01-22T00:39:46Z' + anthropic-ratelimit-output-tokens-limit: + - '1000000' + anthropic-ratelimit-output-tokens-remaining: + - '1000000' + anthropic-ratelimit-output-tokens-reset: + - '2026-01-22T00:39:46Z' + anthropic-ratelimit-requests-limit: + - '10000' + anthropic-ratelimit-requests-remaining: + - '9999' + anthropic-ratelimit-requests-reset: + - '2026-01-22T00:39:46Z' + anthropic-ratelimit-tokens-limit: + - '6000000' + anthropic-ratelimit-tokens-remaining: + - '6000000' + anthropic-ratelimit-tokens-reset: + - '2026-01-22T00:39:46Z' + cf-cache-status: + - DYNAMIC + request-id: + - req_011CXMV5GPmdzjhwfgKZKNxf + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '449' + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_auto_google_genai.yaml b/py/src/braintrust/wrappers/cassettes/test_auto_google_genai.yaml new file mode 100644 index 000000000..edeb2b682 --- /dev/null +++ b/py/src/braintrust/wrappers/cassettes/test_auto_google_genai.yaml @@ -0,0 +1,62 @@ +interactions: +- request: + body: '{"contents": [{"parts": [{"text": "Say hi"}], "role": "user"}], "generationConfig": + {"maxOutputTokens": 100}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '109' + Content-Type: + - application/json + Host: + - generativelanguage.googleapis.com + user-agent: + - google-genai-sdk/1.60.0 gl-python/3.13.3 + x-goog-api-client: + - google-genai-sdk/1.60.0 gl-python/3.13.3 + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-001:generateContent + response: + body: + string: !!binary | + H4sIAAAAAAAC/61Ry07DMBC85yuMz03lpA8eF4QKUgtUVBDxRpUh28TCtSPboURV/x0naVoHrvhg + rXZmd3Zn1x5C+IOKmMXUgMYn6MVmEFpXf4lJYUAYCzQpm8yoMntu/dZObCkGvssiPGbIpKDgAI3l + ClklNEEp8AwVMkdGxrQ4fRXYqd3s4rfOXlFJDmW7pYyBN/RNQ8ALJphOb4FqKUraXXQzwzuUfiXX + MsmUfC+H9kmXBIMjQsKgPwyHg8Ow1z/2GulKFOeaJjAFQ60rdLc7ti2WmYnkJ4iRzCtXwlrF8bAF + B8EWN9JQ3oZ6nT9d9bnVZNz11rHdLk85M0W5YXTxGGHHINMaqjHIc3z8PeI/aQVBW8zb3qU+1T0o + zeqbJLC0V/LDLvEXnOrUJySoumIFOpNCwyQuebNBVtDnq4fLp3k6z6czrUaKnCXY23g/hhi4Mq4C + AAA= + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 22 Jan 2026 21:44:31 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=430 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_auto_litellm.yaml b/py/src/braintrust/wrappers/cassettes/test_auto_litellm.yaml new file mode 100644 index 000000000..3bf357149 --- /dev/null +++ b/py/src/braintrust/wrappers/cassettes/test_auto_litellm.yaml @@ -0,0 +1,112 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Say hi"}],"model":"gpt-4o-mini"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '71' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 2.15.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFLLbtswELzrK7Y8W4X8qGP5UiAI0PRSoGiAACkCgSZXElOKy5Krpm7g + fy8k2ZbctEAvOuzsjGaG+5IACKPFFoSqJavG2/QmU/uH3Yfq88Ove97cPT1f46f75Zq/f6ncnZh1 + DNo9oeIT662ixltkQ26AVUDJ2KnOr9Z5tlrM83c90JBG29Eqz+mK0sY4ky6yxSrNrtL55siuySiM + YgtfEwCAl/7b+XQaf4otZLPTpMEYZYVie14CEIFsNxEyRhNZOhazEVTkGF1v/dYA1xjwDdzSMyjp + 4CMMHNhTC0xa7t9PuQHLNsrOv2utnQDSOWLZ5e9dPx6Rw9mnpcoH2sU/qKI0zsS6CCgjuc5TZPKi + Rw8JwGPfR3sRUfhAjeeC6Rv2v8sHNTE+wojNj1UJJpZ2Mj+RLsQKjSyNjZM6hZKqRj0yx+5lqw1N + gGQS+bWZv2kPsY2r/kd+BJRCz6gLH1AbdRl4XAvYnei/1s4V94ZFxPDDKCzYYOieQWMpWzscjoj7 + yNgUpXEVBh/McD2lLxb5cpnJfL3ZiOSQ/AYAAP//AwBh+pUaSwMAAA== + headers: + CF-RAY: + - 9c1afa59ad603c7d-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 22 Jan 2026 00:36:35 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=PlzuDRlMMRbycTVjQsnyVm0JzX1xPYSyfWiyV5.ss4o-1769042195-1.0.1.1-GskZbYzH2xdkjFqL_95fGPrEWYDuIHME.G7z1.ZxpgFhV2FYfEYQX7YnLTDCsB4X57NZ52umXVrUpyC8I3FJEa4mT_NvYMPW8qHYS6VMdNA; + path=/; expires=Thu, 22-Jan-26 01:06:35 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=MkSLqw_rIIh52wU3OUETJXSQhjelNm048divbvks86A-1769042195907-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '345' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '368' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999997' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_a0a74895c71242c28a1312c77cc466eb + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_auto_litellm_uninstrumented.yaml b/py/src/braintrust/wrappers/cassettes/test_auto_litellm_uninstrumented.yaml new file mode 100644 index 000000000..c9500c9df --- /dev/null +++ b/py/src/braintrust/wrappers/cassettes/test_auto_litellm_uninstrumented.yaml @@ -0,0 +1,109 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Say hi again"}],"model":"gpt-4o-mini"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '77' + Content-Type: + - application/json + Cookie: + - __cf_bm=PlzuDRlMMRbycTVjQsnyVm0JzX1xPYSyfWiyV5.ss4o-1769042195-1.0.1.1-GskZbYzH2xdkjFqL_95fGPrEWYDuIHME.G7z1.ZxpgFhV2FYfEYQX7YnLTDCsB4X57NZ52umXVrUpyC8I3FJEa4mT_NvYMPW8qHYS6VMdNA; + _cfuvid=MkSLqw_rIIh52wU3OUETJXSQhjelNm048divbvks86A-1769042195907-0.0.1.1-604800000 + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 2.15.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFLBbtswDL37Kzid48F2AzfJpWixQzrs2sMwFAYr07Y2WRQkeWtQ5N8H + 2UnsbB2wiw58fNR7j3xLAISqxQ6E7DDI3ur0UyYP2H0tn7DMPzemf+0eH/zT/Rf/wPtMrCKDX76T + DGfWR8m91RQUmwmWjjBQnJrflttsXeTbcgR6rklHWmtDuua0V0alRVas0+w2zTcndsdKkhc7+JYA + ALyNb9RpanoVO8hW50pP3mNLYndpAhCOdawI9F75gCaI1QxKNoHMKH2vAFtU5gPs+RdINPAIEwcO + PEDgGg93S66jZvAY9ZtB6wWAxnDA6H9U/XxCjhedmlvr+MX/QRWNMsp3lSP0bKImH9iKET0mAM9j + HsOVRWEd9zZUgX/Q+F1+ikPMW3gHDBxQz/XiXL+aVtUUUGm/yFNIlB3VM3MOH4da8QJIFp7/FvPe + 7Mm3Mu3/jJ8BKckGqivrqFby2vDc5ije6L/aLhmPgoUn91NJqoIiF/dQU4ODni5H+IMP1FeNMi05 + 69R0Po2tiu3NTYbbcrMRyTH5DQAA//8DABsEhdxMAwAA + headers: + CF-RAY: + - 9c1afa5d2d4e9ddb-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 22 Jan 2026 00:36:36 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '291' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '488' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_8ef7f0497c4f4f14af150e4b1886cacb + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_auto_openai.yaml b/py/src/braintrust/wrappers/cassettes/test_auto_openai.yaml new file mode 100644 index 000000000..fa8f6cca9 --- /dev/null +++ b/py/src/braintrust/wrappers/cassettes/test_auto_openai.yaml @@ -0,0 +1,112 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Say hi"}],"model":"gpt-4o-mini"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '71' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - PatchedOpenAI/Python 2.15.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xSTY/TMBC951cMPjcoXZpu2wsX0O7CgRsSQqvItSeJWcdj7AlQVv3vyEnbpHxI + XHKYN+/lved5zgCE0WIHQrWSVedt/qZQ/tVdQbX8+HD38/2H9VPfvftafnrb1xsnFolB+y+o+Mx6 + qajzFtnQCVYBJWNSXd6ut8VquS7KAehIo020xnO+orwzzuQ3xc0qL27z5ebEbskojGIHnzMAgOfh + m3w6jT/EDorFedJhjLJBsbssAYhANk2EjNFElo7FYgIVOUY3WL83wC0GfAH39B2UdPAAIwcO1AOT + lofXc27Auo8y+Xe9tTNAOkcsU/7B9eMJOV58Wmp8oH38jSpq40xsq4AykkueIpMXA3rMAB6HPvqr + iMIH6jxXTE84/G47qonpESZseapKMLG0s/mZdCVWaWRpbJzVKZRULeqJOXUve21oBmSzyH+a+Zv2 + GNu45n/kJ0Ap9Iy68gG1UdeBp7WA6UT/tXapeDAsIoZvRmHFBkN6Bo217O14OCIeImNX1cY1GHww + 4/XUvlKrclPuy/1WieyY/QIAAP//AwCD9W0XSwMAAA== + headers: + CF-RAY: + - 9c1aebf06c2bdf9a-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 22 Jan 2026 00:26:46 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=3BmepJS69LHQgVvyFWNjigRlQzFvRa9D27MfU_V1GW0-1769041606-1.0.1.1-DoMWqhIcGyYxaxjFWjcV4tR47V69QGDpBRdKxV_H6ljJ.oOmgsyMCJ26sIf6OFlSFbBKcDcWPXjN8qq.t3Ug7JVypduDXLaQCkFHFsBEA7o; + path=/; expires=Thu, 22-Jan-26 00:56:46 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=bOlyGVzoZ0UIzrB2Mfoafdr89NZcKOJbtyu6_aNBhmc-1769041606331-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '510' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '736' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_7a9c72bf01b245829c035470f37565dc + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_auto_openai_uninstrumented.yaml b/py/src/braintrust/wrappers/cassettes/test_auto_openai_uninstrumented.yaml new file mode 100644 index 000000000..52371d33d --- /dev/null +++ b/py/src/braintrust/wrappers/cassettes/test_auto_openai_uninstrumented.yaml @@ -0,0 +1,110 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Say hi again"}],"model":"gpt-4o-mini"}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '77' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 2.15.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFJBbtswELzrFVuerUJWDMf2JSiSQ4Kgh6JF2qIIBJpcyUwpLk2umhqB + /15Isi25TYBedNjZGc0M9yUBEEaLFQi1kaxqb9ObTG2/f3z4ev8p337T2fXTh4fn+5v19vrL57uF + mLQMWj+h4iPrvaLaW2RDrodVQMnYqk4v58tsNp0vFx1Qk0bb0irP6YzS2jiT5lk+S7PLdHoQVxsy + CqNYwY8EAOCl+7Y+ncbfYgXZ5DipMUZZoVidlgBEINtOhIzRRJaOxWQAFTlG11m/NSAradw7uKVn + UNLBHfQc2FEDTFrursbcgGUTZevfNdaOAOkcsWzzd64fD8j+5NNS5QOt419UURpn4qYIKCO51lNk + 8qJD9wnAY9dHcxZR+EC154LpJ3a/mx7qEMMrvAIysbTDPD/Oz9QKjSyNjaM+hZJqg3pgDuXLRhsa + Acko879mXtPucxtX/Y/8ACiFnlEXPqA26jzwsBawvdG31k4dd4ZFxPDLKCzYYGjfQWMpG9tfjoi7 + yFgXpXEVBh9Mfz6lL/LlxUUml/PFQiT75A8AAAD//wMAqij8WUwDAAA= + headers: + CF-RAY: + - 9c1aee302e3c7ad0-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 22 Jan 2026 00:28:18 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=73R0EcFQEkPbDyJMN0a7VB2r.UkhdNfi.yyTi5ejH28-1769041698-1.0.1.1-fmgaiU4zraDm3rPyEYma0tuNIowoxzKY2k9GhELaNuF3UPmPH2c2dGmdmQjQ1G9Qhh3YtVgVac_QZTjjlviiJuJMv2dNP54pK_iOh2QPOuM; + path=/; expires=Thu, 22-Jan-26 00:58:18 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=hM3HzvCGjMHdQQ.KCIbn2EmpdT8WznQNFHLFEyfvGbY-1769041698408-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '400' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '656' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_2c19458b1431495a867f2ae2a80ce3cb + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_auto_pydantic_ai.yaml b/py/src/braintrust/wrappers/cassettes/test_auto_pydantic_ai.yaml new file mode 100644 index 000000000..32c50c9d2 --- /dev/null +++ b/py/src/braintrust/wrappers/cassettes/test_auto_pydantic_ai.yaml @@ -0,0 +1,112 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Say hi"}],"model":"gpt-4o-mini","max_completion_tokens":100,"stream":false}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '114' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - pydantic-ai/1.44.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFLLbtswELzrK7Y8W4Vsq4ntSw8tmvTUBuihDwQCQ64kJhSXIFdtjcD/ + XlCyLbkPoBcddnZGM8N9zgCE0WIHQrWSVedt/rbQhW8/lF/iHX69a8q9evOx+fzp6fGdu7kRi8Sg + h0dUfGK9VNR5i2zIjbAKKBmT6vL6aluUq/X61QB0pNEmWuM5LynvjDP5qliVeXGdLzdHdktGYRQ7 + +JYBADwP3+TTafwpdlAsTpMOY5QNit15CUAEsmkiZIwmsnQsFhOoyDG6wfqtAW4x4Au4pR+gpIP3 + MHJgTz0wabl/PecGrPsok3/XWzsDpHPEMuUfXN8fkcPZp6XGB3qIv1FFbZyJbRVQRnLJU2TyYkAP + GcD90Ed/EVH4QJ3niukJh99tRzUxPcKELY9VCSaWdjY/kS7EKo0sjY2zOoWSqkU9MafuZa8NzYBs + FvlPM3/THmMb1/yP/AQohZ5RVz6gNuoy8LQWMJ3ov9bOFQ+GRcTw3Sis2GBIz6Cxlr0dD0fEfWTs + qtq4BoMPZrye2ler7XpdyO3VZiOyQ/YLAAD//wMAbBhxq0sDAAA= + headers: + CF-RAY: + - 9c1afdbedfc4cf0a-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 22 Jan 2026 00:38:55 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=.v4HHKHusX6vziKsYW5cyVzZRrAsCxp4XT463GaX0yQ-1769042335-1.0.1.1-InjFtjx7UOJ8ivwZeShYpDg8mc4QGt.4kpoe9GlkrPwH7LBqBZxH.e.oLUSXSkyh_t0ETNUXh6C5G5zGSAXLYT6oNyc6cef0jwB2ADi_S.w; + path=/; expires=Thu, 22-Jan-26 01:08:55 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=bHlkcNsEuGGe.AQuXN6zbPWK8MJ2dKBjLFcSS263aVQ-1769042335390-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '395' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '412' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999997' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_0cc555f0b9354a85a3b0f965716d99de + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py b/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py index e6cc9445f..870ec0e16 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py @@ -105,7 +105,6 @@ def setup_claude_agent_sdk( setattr(module, "tool", wrapped_tool_fn) return True - except ImportError as e: - logger.error(f"Failed to import Claude Agent SDK: {e}") - logger.error("claude-agent-sdk is not installed. Please install it with: pip install claude-agent-sdk") + except ImportError: + # Not installed - this is expected when using auto_instrument() return False diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py index c646381fd..db2fd7295 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py @@ -23,6 +23,7 @@ _create_client_wrapper_class, _create_tool_wrapper_class, ) +from braintrust.wrappers.test_utils import verify_autoinstrument_script PROJECT_NAME = "test-claude-agent-sdk" TEST_MODEL = "claude-haiku-4-5-20251001" @@ -283,3 +284,11 @@ async def _multi_message_generator(): """Generator yielding multiple messages.""" yield _make_message("Part 1") yield _make_message("Part 2") + + +class TestAutoInstrumentClaudeAgentSDK: + """Tests for auto_instrument() with Claude Agent SDK.""" + + def test_auto_instrument_claude_agent_sdk(self): + """Test auto_instrument patches Claude Agent SDK and creates spans.""" + verify_autoinstrument_script("test_auto_claude_agent_sdk.py") diff --git a/py/src/braintrust/wrappers/dspy.py b/py/src/braintrust/wrappers/dspy.py index 1c6a45b83..cead5727a 100644 --- a/py/src/braintrust/wrappers/dspy.py +++ b/py/src/braintrust/wrappers/dspy.py @@ -60,6 +60,8 @@ except ImportError: raise ImportError("DSPy is not installed. Please install it with: pip install dspy") +__all__ = ["BraintrustDSpyCallback", "patch_dspy", "unpatch_dspy"] + class BraintrustDSpyCallback(BaseCallback): """Callback handler that logs DSPy execution traces to Braintrust. @@ -412,4 +414,81 @@ def on_evaluate_end( span.end() -__all__ = ["BraintrustDSpyCallback"] +def patch_dspy() -> bool: + """ + Patch DSPy to automatically add Braintrust tracing callback. + + After calling this, all calls to dspy.configure() will automatically + include the BraintrustDSpyCallback. + + Returns: + True if DSPy was patched (or already patched), False if DSPy is not installed. + + Example: + ```python + import braintrust + braintrust.patch_dspy() + + import dspy + lm = dspy.LM("openai/gpt-4o-mini") + dspy.configure(lm=lm) # BraintrustDSpyCallback auto-added! + ``` + """ + try: + import dspy + + if hasattr(dspy, "_braintrust_wrapped"): + return True # Already patched + + dspy._braintrust_original_configure = dspy.configure + + def patched_configure(*args, callbacks=None, **kwargs): + # Auto-add BraintrustDSpyCallback if not already present + if callbacks is None: + callbacks = [] + else: + callbacks = list(callbacks) + + # Check if already has Braintrust callback + has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks) + if not has_bt_callback: + callbacks.append(BraintrustDSpyCallback()) + + return dspy._braintrust_original_configure(*args, callbacks=callbacks, **kwargs) + + dspy.configure = patched_configure + dspy._braintrust_wrapped = True + return True + + except ImportError: + return False + + +def unpatch_dspy() -> bool: + """ + Restore DSPy to its original state, removing automatic Braintrust callback. + + Returns: + True if DSPy was unpatched (or wasn't patched), False if DSPy is not installed. + + Example: + ```python + import braintrust + braintrust.patch_dspy() + # ... use auto-traced DSPy ... + braintrust.unpatch_dspy() # Restore original behavior + ``` + """ + try: + import dspy + + if hasattr(dspy, "_braintrust_wrapped"): + dspy.configure = dspy._braintrust_original_configure + + delattr(dspy, "_braintrust_wrapped") + delattr(dspy, "_braintrust_original_configure") + + return True + + except ImportError: + return False diff --git a/py/src/braintrust/wrappers/google_genai/__init__.py b/py/src/braintrust/wrappers/google_genai/__init__.py index d5a3d053e..f80db1503 100644 --- a/py/src/braintrust/wrappers/google_genai/__init__.py +++ b/py/src/braintrust/wrappers/google_genai/__init__.py @@ -15,7 +15,13 @@ def setup_genai( api_key: str | None = None, project_id: str | None = None, project_name: str | None = None, -): +) -> bool: + """ + Setup Braintrust integration with Google GenAI. + + Returns: + True if setup was successful, False if google-genai is not installed. + """ span = current_span() if span == NOOP_SPAN: init_logger(project=project_name, api_key=api_key, project_id=project_id) @@ -27,11 +33,8 @@ def setup_genai( genai.Client = wrap_client(genai.Client) models.Models = wrap_models(models.Models) models.AsyncModels = wrap_async_models(models.AsyncModels) - pass - except ImportError as e: - logger.error( - f"Failed to import Google ADK agents: {e}. Google ADK is not installed. Please install it with: pip install google-adk" - ) + return True + except ImportError: return False diff --git a/py/src/braintrust/wrappers/litellm.py b/py/src/braintrust/wrappers/litellm.py index fe94a87b1..49675c228 100644 --- a/py/src/braintrust/wrappers/litellm.py +++ b/py/src/braintrust/wrappers/litellm.py @@ -631,13 +631,16 @@ def serialize_response_format(response_format: Any) -> Any: return response_format -def patch_litellm(): +def patch_litellm() -> bool: """ Patch LiteLLM to add Braintrust tracing. This wraps litellm.completion and litellm.acompletion to automatically create Braintrust spans with detailed token metrics, timing, and costs. + Returns: + True if LiteLLM was patched (or already patched), False if LiteLLM is not installed. + Example: ```python import braintrust @@ -669,17 +672,21 @@ def patch_litellm(): litellm.responses = wrapped.responses litellm.aresponses = wrapped.aresponses litellm._braintrust_wrapped = True + return True except ImportError: - pass # litellm not available + return False -def unpatch_litellm(): +def unpatch_litellm() -> bool: """ Restore LiteLLM to its original state, removing Braintrust tracing. This undoes the patching done by patch_litellm(), restoring the original completion, acompletion, responses, and aresponses functions. + Returns: + True if LiteLLM was unpatched (or wasn't patched), False if LiteLLM is not installed. + Example: ```python import braintrust @@ -704,5 +711,6 @@ def unpatch_litellm(): delattr(litellm, "_braintrust_original_acompletion") delattr(litellm, "_braintrust_original_responses") delattr(litellm, "_braintrust_original_aresponses") + return True except ImportError: - pass # litellm not available + return False diff --git a/py/src/braintrust/wrappers/pydantic_ai.py b/py/src/braintrust/wrappers/pydantic_ai.py index f8f7bd92f..4a1fde32a 100644 --- a/py/src/braintrust/wrappers/pydantic_ai.py +++ b/py/src/braintrust/wrappers/pydantic_ai.py @@ -51,9 +51,8 @@ def setup_pydantic_ai( wrap_model_classes() return True - except ImportError as e: - logger.error(f"Failed to import Pydantic AI: {e}") - logger.error("Pydantic AI is not installed. Please install it with: pip install pydantic-ai-slim") + except ImportError: + # Not installed - this is expected when using auto_instrument() return False diff --git a/py/src/braintrust/wrappers/test_agno.py b/py/src/braintrust/wrappers/test_agno.py index 4b84a4b50..4fd3d8455 100644 --- a/py/src/braintrust/wrappers/test_agno.py +++ b/py/src/braintrust/wrappers/test_agno.py @@ -8,6 +8,7 @@ from braintrust import logger from braintrust.test_helpers import init_test_logger from braintrust.wrappers.agno import setup_agno +from braintrust.wrappers.test_utils import verify_autoinstrument_script TEST_ORG_ID = "test-org-123" PROJECT_NAME = "test-agno-app" @@ -94,3 +95,11 @@ def test_agno_simple_agent_execution(memory_logger): assert llm_span["metrics"]["prompt_tokens"] == 38 assert llm_span["metrics"]["completion_tokens"] == 4 assert llm_span["metrics"]["tokens"] == 42 + + +class TestAutoInstrumentAgno: + """Tests for auto_instrument() with Agno.""" + + def test_auto_instrument_agno(self): + """Test auto_instrument patches Agno and creates spans.""" + verify_autoinstrument_script("test_auto_agno.py") diff --git a/py/src/braintrust/wrappers/test_anthropic.py b/py/src/braintrust/wrappers/test_anthropic.py index cf5fd45c3..4fcc6d804 100644 --- a/py/src/braintrust/wrappers/test_anthropic.py +++ b/py/src/braintrust/wrappers/test_anthropic.py @@ -9,6 +9,7 @@ from braintrust import logger from braintrust.test_helpers import init_test_logger from braintrust.wrappers.anthropic import wrap_anthropic +from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script TEST_ORG_ID = "test-org-123" PROJECT_NAME = "test-anthropic-app" @@ -481,3 +482,185 @@ async def test_anthropic_beta_messages_streaming_async(memory_logger): assert metrics["prompt_tokens"] == usage.input_tokens assert metrics["completion_tokens"] == usage.output_tokens assert metrics["tokens"] == usage.input_tokens + usage.output_tokens + + +class TestPatchAnthropic: + """Tests for patch_anthropic() / unpatch_anthropic().""" + + def test_patch_anthropic_sets_wrapped_flag(self): + """patch_anthropic() should set _braintrust_wrapped on anthropic module.""" + result = run_in_subprocess(""" + from braintrust.wrappers.anthropic import patch_anthropic + import anthropic + + assert not hasattr(anthropic, "_braintrust_wrapped") + patch_anthropic() + assert hasattr(anthropic, "_braintrust_wrapped") + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_anthropic_wraps_new_clients(self): + """After patch_anthropic(), new Anthropic() clients should be wrapped.""" + result = run_in_subprocess(""" + from braintrust.wrappers.anthropic import patch_anthropic + patch_anthropic() + + import anthropic + client = anthropic.Anthropic(api_key="test-key") + + # Check that messages is wrapped + messages_type = type(client.messages).__name__ + print(f"messages_type={messages_type}") + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_unpatch_anthropic_restores_original(self): + """unpatch_anthropic() should restore original classes.""" + result = run_in_subprocess(""" + import anthropic + from braintrust.wrappers.anthropic import patch_anthropic, unpatch_anthropic + + original_class = anthropic.Anthropic + + patch_anthropic() + patched_class = anthropic.Anthropic + assert patched_class is not original_class + + unpatch_anthropic() + restored_class = anthropic.Anthropic + assert restored_class is original_class + assert not hasattr(anthropic, "_braintrust_wrapped") + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_anthropic_idempotent(self): + """Multiple patch_anthropic() calls should be safe.""" + result = run_in_subprocess(""" + from braintrust.wrappers.anthropic import patch_anthropic + import anthropic + + patch_anthropic() + first_class = anthropic.Anthropic + + patch_anthropic() # Second call + second_class = anthropic.Anthropic + + assert first_class is second_class + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_anthropic_creates_spans(self): + """patch_anthropic() should create spans when making API calls.""" + result = run_in_subprocess(""" + from braintrust.wrappers.anthropic import patch_anthropic + from braintrust.test_helpers import init_test_logger + from braintrust import logger + + # Set up memory logger + init_test_logger("test-auto") + with logger._internal_with_memory_background_logger() as memory_logger: + patch_anthropic() + + import anthropic + client = anthropic.Anthropic() + + # Make a call within a span context + import braintrust + with braintrust.start_span(name="test") as span: + try: + # This will fail without API key, but span should still be created + client.messages.create( + model="claude-3-5-haiku-latest", + max_tokens=100, + messages=[{"role": "user", "content": "hi"}], + ) + except Exception: + pass # Expected without API key + + # Check that spans were logged + spans = memory_logger.pop() + # Should have at least the parent span + assert len(spans) >= 1, f"Expected spans, got {spans}" + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + +class TestPatchAnthropicSpans: + """VCR-based tests verifying that patch_anthropic() produces spans.""" + + @pytest.mark.vcr + def test_patch_anthropic_creates_spans(self, memory_logger): + """patch_anthropic() should create spans when making API calls.""" + from braintrust.wrappers.anthropic import patch_anthropic, unpatch_anthropic + + assert not memory_logger.pop() + + patch_anthropic() + try: + client = anthropic.Anthropic() + response = client.messages.create( + model="claude-3-5-haiku-latest", + max_tokens=100, + messages=[{"role": "user", "content": "Say hi"}], + ) + assert response.content[0].text + + # Verify span was created + spans = memory_logger.pop() + assert len(spans) == 1 + span = spans[0] + assert span["metadata"]["provider"] == "anthropic" + assert "claude" in span["metadata"]["model"] + assert span["input"] + finally: + unpatch_anthropic() + + +class TestPatchAnthropicAsyncSpans: + """VCR-based tests verifying that patch_anthropic() produces spans for async clients.""" + + @pytest.mark.vcr + @pytest.mark.asyncio + async def test_patch_anthropic_async_creates_spans(self, memory_logger): + """patch_anthropic() should create spans for async API calls.""" + from braintrust.wrappers.anthropic import patch_anthropic, unpatch_anthropic + + assert not memory_logger.pop() + + patch_anthropic() + try: + client = anthropic.AsyncAnthropic() + response = await client.messages.create( + model="claude-3-5-haiku-latest", + max_tokens=100, + messages=[{"role": "user", "content": "Say hi async"}], + ) + assert response.content[0].text + + # Verify span was created + spans = memory_logger.pop() + assert len(spans) == 1 + span = spans[0] + assert span["metadata"]["provider"] == "anthropic" + assert "claude" in span["metadata"]["model"] + assert span["input"] + finally: + unpatch_anthropic() + + +class TestAutoInstrumentAnthropic: + """Tests for auto_instrument() with Anthropic.""" + + def test_auto_instrument_anthropic(self): + """Test auto_instrument patches Anthropic, creates spans, and uninstrument works.""" + verify_autoinstrument_script("test_auto_anthropic.py") diff --git a/py/src/braintrust/wrappers/test_dspy.py b/py/src/braintrust/wrappers/test_dspy.py index e5661d074..1652b423f 100644 --- a/py/src/braintrust/wrappers/test_dspy.py +++ b/py/src/braintrust/wrappers/test_dspy.py @@ -7,6 +7,7 @@ from braintrust import logger from braintrust.test_helpers import init_test_logger from braintrust.wrappers.dspy import BraintrustDSpyCallback +from braintrust.wrappers.test_utils import run_in_subprocess, verify_autoinstrument_script PROJECT_NAME = "test-dspy-app" MODEL = "openai/gpt-4o-mini" @@ -58,3 +59,159 @@ def test_dspy_callback(memory_logger): # Verify span parenting (LM span should have parent) assert lm_span.get("span_parents") # LM span should have parent + + +class TestPatchDSPy: + """Tests for patch_dspy() / unpatch_dspy().""" + + def test_patch_dspy_sets_wrapped_flag(self): + """patch_dspy() should set _braintrust_wrapped on dspy module.""" + result = run_in_subprocess(""" + dspy = __import__("dspy") + from braintrust.wrappers.dspy import patch_dspy + + assert not hasattr(dspy, "_braintrust_wrapped") + patch_dspy() + assert hasattr(dspy, "_braintrust_wrapped") + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_dspy_wraps_configure(self): + """After patch_dspy(), dspy.configure() should auto-add BraintrustDSpyCallback.""" + result = run_in_subprocess(""" + from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback + patch_dspy() + + import dspy + + # Configure without explicitly adding callback + dspy.configure(lm=None) + + # Check that BraintrustDSpyCallback was auto-added + from dspy.dsp.utils.settings import settings + callbacks = settings.callbacks + has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks) + assert has_bt_callback, f"Expected BraintrustDSpyCallback in {callbacks}" + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_dspy_preserves_existing_callbacks(self): + """patch_dspy() should preserve user-provided callbacks.""" + result = run_in_subprocess(""" + from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback + patch_dspy() + + import dspy + from dspy.utils.callback import BaseCallback + + class MyCallback(BaseCallback): + pass + + my_callback = MyCallback() + dspy.configure(lm=None, callbacks=[my_callback]) + + from dspy.dsp.utils.settings import settings + callbacks = settings.callbacks + + # Should have both callbacks + has_my_callback = any(cb is my_callback for cb in callbacks) + has_bt_callback = any(isinstance(cb, BraintrustDSpyCallback) for cb in callbacks) + + assert has_my_callback, "User callback should be preserved" + assert has_bt_callback, "BraintrustDSpyCallback should be added" + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_dspy_does_not_duplicate_callback(self): + """patch_dspy() should not add duplicate BraintrustDSpyCallback.""" + result = run_in_subprocess(""" + from braintrust.wrappers.dspy import patch_dspy, BraintrustDSpyCallback + patch_dspy() + + import dspy + + # User explicitly adds BraintrustDSpyCallback + bt_callback = BraintrustDSpyCallback() + dspy.configure(lm=None, callbacks=[bt_callback]) + + from dspy.dsp.utils.settings import settings + callbacks = settings.callbacks + + # Should only have one BraintrustDSpyCallback + bt_callbacks = [cb for cb in callbacks if isinstance(cb, BraintrustDSpyCallback)] + assert len(bt_callbacks) == 1, f"Expected 1 BraintrustDSpyCallback, got {len(bt_callbacks)}" + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_unpatch_dspy_restores_original(self): + """unpatch_dspy() should restore original configure function.""" + result = run_in_subprocess(""" + import dspy + from braintrust.wrappers.dspy import patch_dspy, unpatch_dspy + + original_configure = dspy.configure + + patch_dspy() + patched_configure = dspy.configure + assert patched_configure is not original_configure + + unpatch_dspy() + restored_configure = dspy.configure + assert restored_configure is original_configure + assert not hasattr(dspy, "_braintrust_wrapped") + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_dspy_idempotent(self): + """Multiple patch_dspy() calls should be safe.""" + result = run_in_subprocess(""" + from braintrust.wrappers.dspy import patch_dspy + import dspy + + patch_dspy() + first_configure = dspy.configure + + patch_dspy() # Second call + second_configure = dspy.configure + + assert first_configure is second_configure + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_unpatch_dspy_idempotent(self): + """Multiple unpatch_dspy() calls should be safe.""" + result = run_in_subprocess(""" + from braintrust.wrappers.dspy import patch_dspy, unpatch_dspy + import dspy + + original_configure = dspy.configure + + patch_dspy() + unpatch_dspy() + unpatch_dspy() # Second call - should be no-op + + assert dspy.configure is original_configure + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + +class TestAutoInstrumentDSPy: + """Tests for auto_instrument() with DSPy.""" + + def test_auto_instrument_dspy(self): + """Test auto_instrument patches DSPy, creates spans, and uninstrument works.""" + verify_autoinstrument_script("test_auto_dspy.py") diff --git a/py/src/braintrust/wrappers/test_google_genai.py b/py/src/braintrust/wrappers/test_google_genai.py index 27a65370f..02fc21f55 100644 --- a/py/src/braintrust/wrappers/test_google_genai.py +++ b/py/src/braintrust/wrappers/test_google_genai.py @@ -6,6 +6,7 @@ from braintrust import logger from braintrust.test_helpers import init_test_logger from braintrust.wrappers.google_genai import setup_genai +from braintrust.wrappers.test_utils import verify_autoinstrument_script from google.genai import types from google.genai.client import Client @@ -637,3 +638,11 @@ class TestModel(BaseModel): # Attachment should be preserved assert copied["context_file"] is attachment + + +class TestAutoInstrumentGoogleGenAI: + """Tests for auto_instrument() with Google GenAI.""" + + def test_auto_instrument_google_genai(self): + """Test auto_instrument patches Google GenAI and creates spans.""" + verify_autoinstrument_script("test_auto_google_genai.py") diff --git a/py/src/braintrust/wrappers/test_litellm.py b/py/src/braintrust/wrappers/test_litellm.py index 6bda8c1c0..3471dc1bf 100644 --- a/py/src/braintrust/wrappers/test_litellm.py +++ b/py/src/braintrust/wrappers/test_litellm.py @@ -6,7 +6,7 @@ from braintrust import logger from braintrust.test_helpers import assert_dict_matches, init_test_logger from braintrust.wrappers.litellm import wrap_litellm -from braintrust.wrappers.test_utils import assert_metrics_are_valid +from braintrust.wrappers.test_utils import assert_metrics_are_valid, verify_autoinstrument_script TEST_ORG_ID = "test-org-litellm-py-tracing" PROJECT_NAME = "test-project-litellm-py-tracing" @@ -765,3 +765,11 @@ async def test_patch_litellm_aresponses(memory_logger): assert TEST_PROMPT in str(span["input"]) finally: unpatch_litellm() + + +class TestAutoInstrumentLiteLLM: + """Tests for auto_instrument() with LiteLLM.""" + + def test_auto_instrument_litellm(self): + """Test auto_instrument patches LiteLLM, creates spans, and uninstrument works.""" + verify_autoinstrument_script("test_auto_litellm.py") diff --git a/py/src/braintrust/wrappers/test_openai.py b/py/src/braintrust/wrappers/test_openai.py index a693bd3a5..0d1e5f6ca 100644 --- a/py/src/braintrust/wrappers/test_openai.py +++ b/py/src/braintrust/wrappers/test_openai.py @@ -6,7 +6,7 @@ import pytest from braintrust import logger, wrap_openai from braintrust.test_helpers import assert_dict_matches, init_test_logger -from braintrust.wrappers.test_utils import assert_metrics_are_valid +from braintrust.wrappers.test_utils import assert_metrics_are_valid, run_in_subprocess, verify_autoinstrument_script from openai import AsyncOpenAI from openai._types import NOT_GIVEN from pydantic import BaseModel @@ -1681,3 +1681,329 @@ def export(self): spans = memory_logger.pop() root_span = spans[0] assert root_span["metadata"]["conversation_id"] == "test-12345", "Should log trace metadata" + + +class TestPatchOpenAI: + """Tests for patch_openai() / unpatch_openai().""" + + def test_patch_openai_sets_wrapped_flag(self): + """patch_openai() should set _braintrust_wrapped on openai module.""" + result = run_in_subprocess(""" + from braintrust.oai import patch_openai + import openai + + assert not hasattr(openai, "_braintrust_wrapped") + patch_openai() + assert hasattr(openai, "_braintrust_wrapped") + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_openai_wraps_new_clients(self): + """After patch_openai(), new OpenAI() clients should be wrapped.""" + result = run_in_subprocess(""" + from braintrust.oai import patch_openai + patch_openai() + + import openai + client = openai.OpenAI(api_key="test-key") + + # Check that chat completions is wrapped (our wrapper adds tracing) + # The wrapper replaces client.chat with a wrapped version + chat_type = type(client.chat).__name__ + print(f"chat_type={chat_type}") + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_openai_creates_spans(self): + """patch_openai() should create spans when making API calls.""" + result = run_in_subprocess(""" + from braintrust.oai import patch_openai + from braintrust.test_helpers import init_test_logger + from braintrust import logger + + # Set up memory logger + init_test_logger("test-auto") + with logger._internal_with_memory_background_logger() as memory_logger: + patch_openai() + + import openai + client = openai.OpenAI() + + # Make a call within a span context + import braintrust + with braintrust.start_span(name="test") as span: + try: + # This will fail without API key, but span should still be created + client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "hi"}], + ) + except Exception: + pass # Expected without API key + + # Check that spans were logged + spans = memory_logger.pop() + # Should have at least the parent span + assert len(spans) >= 1, f"Expected spans, got {spans}" + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_openai_before_import(self): + """patch_openai() should work when called before importing openai.""" + result = run_in_subprocess(""" + from braintrust.oai import patch_openai + + # Patch BEFORE importing openai + patch_openai() + + import openai + assert hasattr(openai, "_braintrust_wrapped") + + client = openai.OpenAI(api_key="test-key") + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_openai_after_import(self): + """patch_openai() should work when called after importing openai.""" + result = run_in_subprocess(""" + import openai + from braintrust.oai import patch_openai + + # Patch AFTER importing openai + patch_openai() + + assert hasattr(openai, "_braintrust_wrapped") + + client = openai.OpenAI(api_key="test-key") + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_unpatch_openai_restores_original(self): + """unpatch_openai() should restore original classes.""" + result = run_in_subprocess(""" + import openai + from braintrust.oai import patch_openai, unpatch_openai + + original_class = openai.OpenAI + + patch_openai() + patched_class = openai.OpenAI + assert patched_class is not original_class + + unpatch_openai() + restored_class = openai.OpenAI + assert restored_class is original_class + assert not hasattr(openai, "_braintrust_wrapped") + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_openai_idempotent(self): + """Multiple patch_openai() calls should be safe.""" + result = run_in_subprocess(""" + from braintrust.oai import patch_openai, unpatch_openai + import openai + + patch_openai() + first_class = openai.OpenAI + + patch_openai() # Second call + second_class = openai.OpenAI + + assert first_class is second_class + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_unpatch_openai_idempotent(self): + """Multiple unpatch_openai() calls should be safe.""" + result = run_in_subprocess(""" + from braintrust.oai import patch_openai, unpatch_openai + import openai + + original_class = openai.OpenAI + + patch_openai() + unpatch_openai() + unpatch_openai() # Second call - should be no-op + + assert openai.OpenAI is original_class + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_openai_chains_with_other_patches(self): + """patch_openai() should chain with other libraries that patch OpenAI.""" + result = run_in_subprocess(""" + import openai + + # Simulate another library (like Datadog) patching OpenAI first + other_library_init_called = [] + + class OtherLibraryOpenAI(openai.OpenAI): + def __init__(self, *args, **kwargs): + other_library_init_called.append(True) + super().__init__(*args, **kwargs) + + openai.OpenAI = OtherLibraryOpenAI + + # Now apply our patch - should subclass OtherLibraryOpenAI + from braintrust.oai import patch_openai + patch_openai() + + # Create a client - both patches should run + client = openai.OpenAI(api_key="test-key") + + # Verify other library's __init__ was called (chaining works) + assert len(other_library_init_called) == 1, "Other library's patch should have run" + + # Verify our patch was applied (client has wrapped chat) + assert hasattr(client, "chat"), "Client should have chat attribute" + + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_unpatch_openai_restores_to_previous_patch(self): + """unpatch_openai() should restore to previous patch, not original.""" + result = run_in_subprocess(""" + import openai + + original_class = openai.OpenAI + + # Simulate another library patching first + class OtherLibraryOpenAI(openai.OpenAI): + pass + + openai.OpenAI = OtherLibraryOpenAI + + # Apply our patch + from braintrust.oai import patch_openai, unpatch_openai + patch_openai() + + # Unpatch - should restore to OtherLibraryOpenAI, not original + unpatch_openai() + + assert openai.OpenAI is OtherLibraryOpenAI, "Should restore to previous patch" + assert openai.OpenAI is not original_class, "Should not restore to original" + + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + def test_patch_openai_chains_async_client(self): + """patch_openai() should chain with other libraries for AsyncOpenAI too.""" + result = run_in_subprocess(""" + import openai + + # Simulate another library patching AsyncOpenAI first + other_library_init_called = [] + + class OtherLibraryAsyncOpenAI(openai.AsyncOpenAI): + def __init__(self, *args, **kwargs): + other_library_init_called.append(True) + super().__init__(*args, **kwargs) + + openai.AsyncOpenAI = OtherLibraryAsyncOpenAI + + # Now apply our patch + from braintrust.oai import patch_openai + patch_openai() + + # Create an async client - both patches should run + client = openai.AsyncOpenAI(api_key="test-key") + + # Verify other library's __init__ was called + assert len(other_library_init_called) == 1, "Other library's patch should have run" + + # Verify our patch was applied + assert hasattr(client, "chat"), "Client should have chat attribute" + + print("SUCCESS") + """) + assert result.returncode == 0, f"Failed: {result.stderr}" + assert "SUCCESS" in result.stdout + + +class TestPatchOpenAISpans: + """VCR-based tests verifying that patch_openai() produces spans.""" + + @pytest.mark.vcr + def test_patch_openai_creates_spans(self, memory_logger): + """patch_openai() should create spans when making API calls.""" + from braintrust.oai import patch_openai, unpatch_openai + + assert not memory_logger.pop() + + patch_openai() + try: + client = openai.OpenAI() + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Say hi"}], + ) + assert response.choices[0].message.content + + # Verify span was created + spans = memory_logger.pop() + assert len(spans) == 1 + span = spans[0] + assert span["metadata"]["provider"] == "openai" + assert "gpt-4o-mini" in span["metadata"]["model"] + assert span["input"] + finally: + unpatch_openai() + + +class TestPatchOpenAIAsyncSpans: + """VCR-based tests verifying that patch_openai() produces spans for async clients.""" + + @pytest.mark.vcr + @pytest.mark.asyncio + async def test_patch_openai_async_creates_spans(self, memory_logger): + """patch_openai() should create spans for async API calls.""" + from braintrust.oai import patch_openai, unpatch_openai + + assert not memory_logger.pop() + + patch_openai() + try: + client = openai.AsyncOpenAI() + response = await client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Say hi async"}], + ) + assert response.choices[0].message.content + + # Verify span was created + spans = memory_logger.pop() + assert len(spans) == 1 + span = spans[0] + assert span["metadata"]["provider"] == "openai" + assert "gpt-4o-mini" in span["metadata"]["model"] + assert span["input"] + finally: + unpatch_openai() + + +class TestAutoInstrumentOpenAI: + """Tests for auto_instrument() with OpenAI.""" + + def test_auto_instrument_openai(self): + """Test auto_instrument patches OpenAI, creates spans, and uninstrument works.""" + verify_autoinstrument_script("test_auto_openai.py") diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py index 4c8f05e13..fa79f7e6e 100644 --- a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py +++ b/py/src/braintrust/wrappers/test_pydantic_ai_integration.py @@ -9,6 +9,7 @@ from braintrust import logger, setup_pydantic_ai, traced from braintrust.span_types import SpanTypeAttribute from braintrust.test_helpers import init_test_logger +from braintrust.wrappers.test_utils import verify_autoinstrument_script from pydantic import BaseModel from pydantic_ai import Agent, ModelSettings from pydantic_ai.messages import ModelRequest, UserPromptPart @@ -2572,3 +2573,11 @@ async def test_attachment_in_result_data(memory_logger): copied = bt_safe_deep_copy(result_data) assert copied["output_file"] is ext_attachment assert copied["success"] is True + + +class TestAutoInstrumentPydanticAI: + """Tests for auto_instrument() with Pydantic AI.""" + + def test_auto_instrument_pydantic_ai(self): + """Test auto_instrument patches Pydantic AI and creates spans.""" + verify_autoinstrument_script("test_auto_pydantic_ai.py") diff --git a/py/src/braintrust/wrappers/test_utils.py b/py/src/braintrust/wrappers/test_utils.py index d4116b917..f397ffc69 100644 --- a/py/src/braintrust/wrappers/test_utils.py +++ b/py/src/braintrust/wrappers/test_utils.py @@ -1,3 +1,44 @@ +import os +import subprocess +import sys +import textwrap +from contextlib import contextmanager +from pathlib import Path + +import vcr +from braintrust import logger +from braintrust.test_helpers import init_test_logger + +AUTO_TEST_SCRIPTS_DIR = Path(__file__).parent / "auto_test_scripts" +CASSETTES_DIR = Path(__file__).parent / "cassettes" + + +def run_in_subprocess(code: str, timeout: int = 30) -> subprocess.CompletedProcess: + """Run Python code in a fresh subprocess.""" + return subprocess.run( + [sys.executable, "-c", textwrap.dedent(code)], + capture_output=True, + text=True, + timeout=timeout, + ) + + +def verify_autoinstrument_script(script_name: str, timeout: int = 30) -> subprocess.CompletedProcess: + """Run a test script from the auto_test_scripts directory. + + Raises AssertionError if the script exits with non-zero code. + """ + script_path = AUTO_TEST_SCRIPTS_DIR / script_name + result = subprocess.run( + [sys.executable, str(script_path)], + capture_output=True, + text=True, + timeout=timeout, + ) + assert result.returncode == 0, f"Script {script_name} failed:\n{result.stderr}" + return result + + def assert_metrics_are_valid(metrics, start=None, end=None): assert metrics # assert 0 < metrics["time_to_first_token"] @@ -10,3 +51,29 @@ def assert_metrics_are_valid(metrics, start=None, end=None): assert start <= metrics["start"] <= metrics["end"] <= end else: assert metrics["start"] <= metrics["end"] + + +@contextmanager +def autoinstrument_test_context(cassette_name: str): + """Context manager for auto_instrument tests. + + Sets up VCR and memory_logger, yields memory_logger for direct use. + + Usage: + with autoinstrument_test_context("test_auto_openai") as memory_logger: + # make API call + spans = memory_logger.pop() + """ + cassette_path = CASSETTES_DIR / f"{cassette_name}.yaml" + + init_test_logger("test-auto-instrument") + + with logger._internal_with_memory_background_logger() as memory_logger: + memory_logger.pop() # Clear any prior spans + + my_vcr = vcr.VCR( + filter_headers=["authorization", "api-key", "x-api-key", "x-goog-api-key"], + record_mode="once" if not os.environ.get("CI") else "none", + ) + with my_vcr.use_cassette(str(cassette_path)): + yield memory_logger