Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion integrations/langchain-js/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@braintrust/langchain-js",
"version": "0.2.2",
"version": "0.2.3",
"description": "SDK for integrating Braintrust with LangChain.js",
"main": "./dist/index.js",
"module": "./dist/index.mjs",
Expand Down
158 changes: 158 additions & 0 deletions integrations/langchain-js/src/BraintrustCallbackHandler.test.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { ChatAnthropic } from "@langchain/anthropic";
import { HumanMessage, SystemMessage } from "@langchain/core/messages";
import { ChatPromptTemplate, PromptTemplate } from "@langchain/core/prompts";
import { RunnableMap } from "@langchain/core/runnables";
import { tool } from "@langchain/core/tools";
Expand Down Expand Up @@ -1076,3 +1078,159 @@ it("should handle nested agent action with parent run id", async () => {
span_parents: [root_span_id],
});
});

it("should extract prompt caching tokens from Anthropic response", async () => {
const logs: LogsRequest[] = [];

const ANTHROPIC_RESPONSE_WITH_CACHE = {
model: "claude-sonnet-4-5-20250929",
id: "msg_01PKL7azhWzdzdXC5yojHG6V",
type: "message",
role: "assistant",
content: [
{
type: "text",
text: 'The first type of testing mentioned is **Unit Testing**, which is described as "Testing individual components or functions in isolation."',
},
],
stop_reason: "end_turn",
stop_sequence: null,
usage: {
input_tokens: 14,
cache_creation_input_tokens: 2042,
cache_read_input_tokens: 0,
cache_creation: {
ephemeral_5m_input_tokens: 0,
ephemeral_1h_input_tokens: 0,
},
output_tokens: 27,
service_tier: "standard",
},
};

const ANTHROPIC_RESPONSE_WITH_CACHE_READ = {
model: "claude-sonnet-4-5-20250929",
id: "msg_01PjHqPkFstyG2Bcqum9XKD8",
type: "message",
role: "assistant",
content: [
{
type: "text",
text: 'Based on the document:\n\n1. **First type of testing mentioned:** Unit Testing - described as "Testing individual components or functions in isolation"\n\n2. **Python testing frameworks mentioned:** \n - pytest (described as "Feature-rich testing framework")\n - unittest (described as "Built-in Python testing module")',
},
],
stop_reason: "end_turn",
stop_sequence: null,
usage: {
input_tokens: 23,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 2042,
cache_creation: {
ephemeral_5m_input_tokens: 0,
ephemeral_1h_input_tokens: 0,
},
output_tokens: 71,
service_tier: "standard",
},
};

const anthropicResponses = [
ANTHROPIC_RESPONSE_WITH_CACHE,
ANTHROPIC_RESPONSE_WITH_CACHE_READ,
];

server.use(
http.post("https://api.anthropic.com/v1/messages", ({ request }) => {
const message = anthropicResponses.shift();
return HttpResponse.json(message);
}),

http.post(/.+logs.+/, async ({ request }) => {
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions
logs.push((await request.json()) as LogsRequest);
console.log(logs.slice(-1)[0]);
return HttpResponse.json(["cache-tokens-span-id"]);
}),
);

const model = new ChatAnthropic({ model: "claude-sonnet-4-5-20250929" });

// Use a long system message to simulate cache-eligible content
const longText = `
# Comprehensive Guide to Software Testing Methods

Software testing is a critical component of the software development lifecycle.
This guide covers various testing methodologies, best practices, and tools.

## Types of Testing
- Unit Testing: Testing individual components or functions in isolation
- Integration Testing: Testing how components work together
- End-to-End Testing: Testing the entire application flow

## Python Testing Tools
- pytest: Feature-rich testing framework
- unittest: Built-in Python testing module
`.repeat(20); // Repeat to ensure enough tokens for caching

const messages = [
new SystemMessage({
content: [
{
type: "text",
text: longText,
cache_control: { type: "ephemeral" },
},
],
}),
new HumanMessage("What is the first type of testing mentioned?"),
];

// First invocation - should create cache
await model.invoke(messages, { callbacks: [handler] });

await flush();

const { spans: firstSpans } = logsToSpans(logs);
logs.length = 0; // Clear logs for next invocation

const firstLlmSpan = firstSpans.find(
(s) => s.span_attributes?.type === "llm",
);

expect(firstLlmSpan).toBeDefined();
expect(firstLlmSpan?.metrics).toMatchObject({
prompt_tokens: 14,
completion_tokens: 27,
});

console.log(firstLlmSpan?.metrics);

// After fix: should have cache creation tokens
expect(firstLlmSpan?.metrics?.prompt_cache_creation_tokens).toBe(2042);

// Second invocation - should read from cache
await model.invoke(
[
...messages,
new HumanMessage("What testing framework is mentioned for Python?"),
],
{ callbacks: [handler] },
);

await flush();

const { spans: secondSpans } = logsToSpans(logs);

const secondLlmSpan = secondSpans.find(
(s) => s.span_attributes?.type === "llm",
);

expect(secondLlmSpan).toBeDefined();
expect(secondLlmSpan?.metrics).toMatchObject({
prompt_tokens: 23,
completion_tokens: 71,
});

// After fix: should have cache read tokens
expect(secondLlmSpan?.metrics?.prompt_cached_tokens).toBe(2042);
});
3 changes: 3 additions & 0 deletions integrations/langchain-js/src/BraintrustCallbackHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -586,10 +586,13 @@ const getMetricsFromResponse = (response: LLMResult | ChatResult) => {
const message: any = generation.message;
const usageMetadata = message.usage_metadata;
if (usageMetadata && typeof usageMetadata === "object") {
const inputTokenDetails = usageMetadata.input_token_details;
const extracted = cleanObject({
total_tokens: usageMetadata.total_tokens,
prompt_tokens: usageMetadata.input_tokens,
completion_tokens: usageMetadata.output_tokens,
prompt_cache_creation_tokens: inputTokenDetails?.cache_creation,
prompt_cached_tokens: inputTokenDetails?.cache_read,
});
Object.assign(metrics, extracted);
break;
Expand Down
5 changes: 3 additions & 2 deletions integrations/langchain-js/src/test/setup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { setupServer } from "msw/node";
import { afterAll, afterEach, beforeAll } from "vitest";

export const server = setupServer(
http.post("http://braintrust.local/api/apikey/login", () => {
http.post(/.+\/api\/apikey\/login/, () => {
return HttpResponse.json({
org_info: [
{
Expand All @@ -19,7 +19,7 @@ export const server = setupServer(
});
}),

http.post("http://braintrust.local/api/project/register", () => {
http.post(/.+\/api\/project\/register/, () => {
return HttpResponse.json({
project: {
id: "209220fc-d3bd-4fab-b1a4-af5827d69200",
Expand All @@ -38,6 +38,7 @@ beforeAll(() => {
// comment out specific to use bypassAndLog
process.env.BRAINTRUST_API_KEY = "braintrust-api-key";
process.env.BRAINTRUST_APP_URL = "http://braintrust.local";
process.env.BRAINTRUST_ORG_NAME = "braintrustdata.com";
process.env.OPENAI_API_KEY = "openai-api-key";

server.listen({
Expand Down
2 changes: 1 addition & 1 deletion integrations/langchain-py/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "braintrust-langchain"
version = "0.2.0"
version = "0.2.1"
description = "Integration for LangChain and Braintrust Tracing"
readme = "README.md"
requires-python = ">=3.10"
Expand Down
12 changes: 12 additions & 0 deletions integrations/langchain-py/src/braintrust_langchain/callbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,18 @@ def _get_metrics_from_response(response: LLMResult):
)
)

# Extract cache tokens from nested input_token_details (LangChain format)
# Maps to Braintrust's standard cache token metric names
input_token_details = usage_metadata.get("input_token_details")
if input_token_details and isinstance(input_token_details, dict):
cache_read = input_token_details.get("cache_read")
cache_creation = input_token_details.get("cache_creation")

if cache_read is not None:
metrics["prompt_cached_tokens"] = cache_read
if cache_creation is not None:
metrics["prompt_cache_creation_tokens"] = cache_creation

if not metrics or not any(metrics.values()):
llm_output: dict[str, Any] = response.llm_output or {}
metrics = llm_output.get("token_usage") or llm_output.get("estimatedTokens") or {}
Expand Down
Loading
Loading