From 12eb1d545b932e6a72641464ffac993482f458ff Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Thu, 5 Mar 2026 15:25:44 -0800 Subject: [PATCH 01/11] test: Add e2e test setup --- .github/workflows/e2e.yaml | 42 ++ e2e/package.json | 16 + e2e/scenarios/experiment-basic.ts | 79 +++ e2e/scenarios/logger-basic.ts | 72 +++ .../experiment-basic.test.ts.snap | 183 ++++++ .../__snapshots__/logger-basic.test.ts.snap | 182 ++++++ e2e/tests/experiment-basic.test.ts | 43 ++ e2e/tests/global-setup.ts | 14 + e2e/tests/helpers/ingestion.ts | 171 ++++++ e2e/tests/helpers/mock-braintrust-server.ts | 525 ++++++++++++++++++ e2e/tests/helpers/normalize.ts | 117 ++++ e2e/tests/helpers/run-scenario.ts | 79 +++ e2e/tests/logger-basic.test.ts | 43 ++ e2e/tsconfig.json | 14 + e2e/turbo.json | 3 + e2e/vitest.config.mts | 10 + package.json | 1 + pnpm-lock.yaml | 18 + pnpm-workspace.yaml | 1 + turbo.json | 5 + 20 files changed, 1618 insertions(+) create mode 100644 .github/workflows/e2e.yaml create mode 100644 e2e/package.json create mode 100644 e2e/scenarios/experiment-basic.ts create mode 100644 e2e/scenarios/logger-basic.ts create mode 100644 e2e/tests/__snapshots__/experiment-basic.test.ts.snap create mode 100644 e2e/tests/__snapshots__/logger-basic.test.ts.snap create mode 100644 e2e/tests/experiment-basic.test.ts create mode 100644 e2e/tests/global-setup.ts create mode 100644 e2e/tests/helpers/ingestion.ts create mode 100644 e2e/tests/helpers/mock-braintrust-server.ts create mode 100644 e2e/tests/helpers/normalize.ts create mode 100644 e2e/tests/helpers/run-scenario.ts create mode 100644 e2e/tests/logger-basic.test.ts create mode 100644 e2e/tsconfig.json create mode 100644 e2e/turbo.json create mode 100644 e2e/vitest.config.mts diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml new file mode 100644 index 000000000..1d9183a0d --- /dev/null +++ b/.github/workflows/e2e.yaml @@ -0,0 +1,42 @@ +name: e2e + +on: + pull_request: + paths: + - "e2e/**" + - "js/**" + - ".github/workflows/e2e.yaml" + - "package.json" + - "pnpm-lock.yaml" + - "pnpm-workspace.yaml" + - "turbo.json" + push: + branches: [main] + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 20 + + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: 20 + + - uses: pnpm/action-setup@v4 + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Run e2e tests + run: pnpm test:e2e diff --git a/e2e/package.json b/e2e/package.json new file mode 100644 index 000000000..b7266cc01 --- /dev/null +++ b/e2e/package.json @@ -0,0 +1,16 @@ +{ + "name": "@braintrust/js-e2e-tests", + "version": "0.0.0", + "private": true, + "scripts": { + "test:e2e": "vitest run", + "test:e2e:update": "vitest run --update" + }, + "devDependencies": { + "@types/node": "^20.10.5", + "braintrust": "workspace:^", + "tsx": "^3.14.0", + "typescript": "5.4.4", + "vitest": "^2.1.9" + } +} diff --git a/e2e/scenarios/experiment-basic.ts b/e2e/scenarios/experiment-basic.ts new file mode 100644 index 000000000..b4d74a70d --- /dev/null +++ b/e2e/scenarios/experiment-basic.ts @@ -0,0 +1,79 @@ +import { initExperiment } from "braintrust"; + +async function main() { + const testRunId = process.env.BRAINTRUST_E2E_RUN_ID ?? "missing-test-run-id"; + const experiment = initExperiment("e2e-evals", { + experiment: "logger-e2e", + metadata: { + suite: "e2e", + }, + tags: ["e2e"], + }); + + await experiment.traced( + async (rootSpan) => { + rootSpan.log({ + output: { + completion: "done", + }, + expected: { + completion: "done", + }, + scores: { + pass: 1, + }, + metadata: { + scenario: "experiment-basic", + record: "sample-1", + testRunId, + }, + }); + + rootSpan.traced( + (childSpan) => { + childSpan.log({ + output: { + tool: "lookup", + status: "success", + }, + metadata: { + stage: "child", + testRunId, + }, + }); + }, + { + name: "tool-span", + event: { + input: { + tool: "lookup", + testRunId, + }, + metadata: { + testRunId, + }, + }, + }, + ); + }, + { + name: "experiment-root", + event: { + input: { + prompt: "Run an evaluation", + testRunId, + }, + metadata: { + testRunId, + }, + }, + }, + ); + + await experiment.flush(); +} + +void main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/e2e/scenarios/logger-basic.ts b/e2e/scenarios/logger-basic.ts new file mode 100644 index 000000000..c38acd36a --- /dev/null +++ b/e2e/scenarios/logger-basic.ts @@ -0,0 +1,72 @@ +import { initLogger } from "braintrust"; + +async function main() { + const testRunId = process.env.BRAINTRUST_E2E_RUN_ID ?? "missing-test-run-id"; + const logger = initLogger({ + projectName: "e2e-project-logs", + }); + + await logger.traced( + async (rootSpan) => { + rootSpan.log({ + output: { + answer: "4", + explanation: "basic arithmetic", + }, + metadata: { + scenario: "logger-basic", + stage: "root", + testRunId, + }, + scores: { + correct: 1, + }, + }); + + rootSpan.traced( + (childSpan) => { + childSpan.log({ + output: { + detail: "child completed", + }, + metadata: { + stage: "child", + testRunId, + }, + }); + }, + { + name: "child-span", + event: { + input: { + step: "child-work", + testRunId, + }, + metadata: { + testRunId, + }, + }, + }, + ); + }, + { + name: "root-span", + event: { + input: { + question: "What is 2 + 2?", + testRunId, + }, + metadata: { + testRunId, + }, + }, + }, + ); + + await logger.flush(); +} + +void main().catch((error) => { + console.error(error); + process.exitCode = 1; +}); diff --git a/e2e/tests/__snapshots__/experiment-basic.test.ts.snap b/e2e/tests/__snapshots__/experiment-basic.test.ts.snap new file mode 100644 index 000000000..8309a2954 --- /dev/null +++ b/e2e/tests/__snapshots__/experiment-basic.test.ts.snap @@ -0,0 +1,183 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`experiment-basic registers an experiment and sends normalized experiment logs > logs3-payloads 1`] = ` +[ + { + "api_version": 2, + "rows": [ + { + "_is_merge": false, + "context": { + "caller_filename": "/e2e/scenarios/experiment-basic.ts", + "caller_functionname": "main", + "caller_lineno": 13, + }, + "created": "", + "experiment_id": "experiment:logger-e2e", + "id": "", + "input": { + "prompt": "Run an evaluation", + "testRunId": "", + }, + "metadata": { + "testRunId": "", + }, + "metrics": { + "start": 0, + }, + "root_span_id": "", + "span_attributes": { + "exec_counter": 0, + "name": "experiment-root", + "type": "eval", + }, + "span_id": "", + }, + ], + }, + { + "api_version": 2, + "rows": [ + { + "_is_merge": true, + "expected": { + "completion": "done", + }, + "experiment_id": "experiment:logger-e2e", + "id": "", + "metadata": { + "record": "sample-1", + "scenario": "experiment-basic", + "testRunId": "", + }, + "metrics": { + "end": 0, + }, + "output": { + "completion": "done", + }, + "root_span_id": "", + "scores": { + "pass": 1, + }, + "span_id": "", + }, + { + "context": { + "caller_filename": "/e2e/scenarios/experiment-basic.ts", + "caller_functionname": "experiment.traced.name", + "caller_lineno": 32, + }, + "created": "", + "experiment_id": "experiment:logger-e2e", + "id": "", + "input": { + "testRunId": "", + "tool": "lookup", + }, + "metadata": { + "stage": "child", + "testRunId": "", + }, + "metrics": { + "end": 0, + "start": 0, + }, + "output": { + "status": "success", + "tool": "lookup", + }, + "root_span_id": "", + "span_attributes": { + "exec_counter": 1, + "name": "tool-span", + }, + "span_id": "", + "span_parents": [ + "", + ], + }, + ], + }, +] +`; + +exports[`experiment-basic registers an experiment and sends normalized experiment logs > root-span 1`] = ` +{ + "context": { + "caller_filename": "/e2e/scenarios/experiment-basic.ts", + "caller_functionname": "main", + "caller_lineno": 13, + }, + "created": "", + "expected": { + "completion": "done", + }, + "experiment_id": "experiment:logger-e2e", + "id": "", + "input": { + "prompt": "Run an evaluation", + "testRunId": "", + }, + "metadata": { + "record": "sample-1", + "scenario": "experiment-basic", + "testRunId": "", + }, + "metrics": { + "end": 0, + "start": 0, + }, + "output": { + "completion": "done", + }, + "root_span_id": "", + "scores": { + "pass": 1, + }, + "span_attributes": { + "exec_counter": 0, + "name": "experiment-root", + "type": "eval", + }, + "span_id": "", +} +`; + +exports[`experiment-basic registers an experiment and sends normalized experiment logs > tool-span 1`] = ` +{ + "context": { + "caller_filename": "/e2e/scenarios/experiment-basic.ts", + "caller_functionname": "experiment.traced.name", + "caller_lineno": 32, + }, + "created": "", + "experiment_id": "experiment:logger-e2e", + "id": "", + "input": { + "testRunId": "", + "tool": "lookup", + }, + "metadata": { + "stage": "child", + "testRunId": "", + }, + "metrics": { + "end": 0, + "start": 0, + }, + "output": { + "status": "success", + "tool": "lookup", + }, + "root_span_id": "", + "span_attributes": { + "exec_counter": 1, + "name": "tool-span", + }, + "span_id": "", + "span_parents": [ + "", + ], +} +`; diff --git a/e2e/tests/__snapshots__/logger-basic.test.ts.snap b/e2e/tests/__snapshots__/logger-basic.test.ts.snap new file mode 100644 index 000000000..bb41ce2ad --- /dev/null +++ b/e2e/tests/__snapshots__/logger-basic.test.ts.snap @@ -0,0 +1,182 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`logger-basic registers a project and sends normalized project logs > child-span 1`] = ` +{ + "context": { + "caller_filename": "/e2e/scenarios/logger-basic.ts", + "caller_functionname": "logger.traced.name", + "caller_lineno": 26, + }, + "created": "", + "id": "", + "input": { + "step": "child-work", + "testRunId": "", + }, + "log_id": "g", + "metadata": { + "stage": "child", + "testRunId": "", + }, + "metrics": { + "end": 0, + "start": 0, + }, + "output": { + "detail": "child completed", + }, + "project_id": "project:e2e-project-logs", + "root_span_id": "", + "span_attributes": { + "exec_counter": 1, + "name": "child-span", + }, + "span_id": "", + "span_parents": [ + "", + ], +} +`; + +exports[`logger-basic registers a project and sends normalized project logs > logs3-payloads 1`] = ` +[ + { + "api_version": 2, + "rows": [ + { + "_is_merge": false, + "context": { + "caller_filename": "/e2e/scenarios/logger-basic.ts", + "caller_functionname": "main", + "caller_lineno": 9, + }, + "created": "", + "id": "", + "input": { + "question": "What is 2 + 2?", + "testRunId": "", + }, + "log_id": "g", + "metadata": { + "testRunId": "", + }, + "metrics": { + "start": 0, + }, + "project_id": "project:e2e-project-logs", + "root_span_id": "", + "span_attributes": { + "exec_counter": 0, + "name": "root-span", + "type": "task", + }, + "span_id": "", + }, + ], + }, + { + "api_version": 2, + "rows": [ + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metadata": { + "scenario": "logger-basic", + "stage": "root", + "testRunId": "", + }, + "metrics": { + "end": 0, + }, + "output": { + "answer": "4", + "explanation": "basic arithmetic", + }, + "project_id": "project:e2e-project-logs", + "root_span_id": "", + "scores": { + "correct": 1, + }, + "span_id": "", + }, + { + "context": { + "caller_filename": "/e2e/scenarios/logger-basic.ts", + "caller_functionname": "logger.traced.name", + "caller_lineno": 26, + }, + "created": "", + "id": "", + "input": { + "step": "child-work", + "testRunId": "", + }, + "log_id": "g", + "metadata": { + "stage": "child", + "testRunId": "", + }, + "metrics": { + "end": 0, + "start": 0, + }, + "output": { + "detail": "child completed", + }, + "project_id": "project:e2e-project-logs", + "root_span_id": "", + "span_attributes": { + "exec_counter": 1, + "name": "child-span", + }, + "span_id": "", + "span_parents": [ + "", + ], + }, + ], + }, +] +`; + +exports[`logger-basic registers a project and sends normalized project logs > root-span 1`] = ` +{ + "context": { + "caller_filename": "/e2e/scenarios/logger-basic.ts", + "caller_functionname": "main", + "caller_lineno": 9, + }, + "created": "", + "id": "", + "input": { + "question": "What is 2 + 2?", + "testRunId": "", + }, + "log_id": "g", + "metadata": { + "scenario": "logger-basic", + "stage": "root", + "testRunId": "", + }, + "metrics": { + "end": 0, + "start": 0, + }, + "output": { + "answer": "4", + "explanation": "basic arithmetic", + }, + "project_id": "project:e2e-project-logs", + "root_span_id": "", + "scores": { + "correct": 1, + }, + "span_attributes": { + "exec_counter": 0, + "name": "root-span", + "type": "task", + }, + "span_id": "", +} +`; diff --git a/e2e/tests/experiment-basic.test.ts b/e2e/tests/experiment-basic.test.ts new file mode 100644 index 000000000..cc55460a8 --- /dev/null +++ b/e2e/tests/experiment-basic.test.ts @@ -0,0 +1,43 @@ +import { expect, test } from "vitest"; +import { + createTestRunId, + getPayloadsForRun, + getTestServerEnv, + waitForRunEvent, +} from "./helpers/ingestion"; +import { normalizeForSnapshot, type Json } from "./helpers/normalize"; +import { runScenarioOrThrow } from "./helpers/run-scenario"; + +test("experiment-basic registers an experiment and sends normalized experiment logs", async () => { + const testRunId = createTestRunId(); + const rootSpanPromise = waitForRunEvent( + testRunId, + (event) => event.span.name === "experiment-root" && event.span.ended, + ); + const toolSpanPromise = waitForRunEvent( + testRunId, + (event) => event.span.name === "tool-span" && event.span.ended, + ); + + await runScenarioOrThrow( + "scenarios/experiment-basic.ts", + getTestServerEnv(testRunId), + ); + + const [rootSpanEvent, toolSpanEvent] = await Promise.all([ + rootSpanPromise, + toolSpanPromise, + ]); + + expect(normalizeForSnapshot(rootSpanEvent.row as Json)).toMatchSnapshot( + "root-span", + ); + expect(normalizeForSnapshot(toolSpanEvent.row as Json)).toMatchSnapshot( + "tool-span", + ); + + const logs3Payloads = await getPayloadsForRun(testRunId); + expect(normalizeForSnapshot(logs3Payloads as Json)).toMatchSnapshot( + "logs3-payloads", + ); +}); diff --git a/e2e/tests/global-setup.ts b/e2e/tests/global-setup.ts new file mode 100644 index 000000000..4aaa83e33 --- /dev/null +++ b/e2e/tests/global-setup.ts @@ -0,0 +1,14 @@ +import { startMockBraintrustServer } from "./helpers/mock-braintrust-server"; + +export default async function globalSetup(context: { + provide: (key: string, value: string) => void; +}) { + const server = await startMockBraintrustServer(); + + context.provide("mockBraintrustApiKey", server.apiKey); + context.provide("mockBraintrustUrl", server.url); + + return async () => { + await server.close(); + }; +} diff --git a/e2e/tests/helpers/ingestion.ts b/e2e/tests/helpers/ingestion.ts new file mode 100644 index 000000000..a4de2dde7 --- /dev/null +++ b/e2e/tests/helpers/ingestion.ts @@ -0,0 +1,171 @@ +import { randomUUID } from "node:crypto"; +import { inject } from "vitest"; +import type { + CapturedLogEvent, + CapturedLogEventBatch, + CapturedLogPayload, + CapturedLogPayloadBatch, +} from "./mock-braintrust-server"; + +const MOCK_BRAINTRUST_URL_KEY = "mockBraintrustUrl"; +const MOCK_BRAINTRUST_API_KEY_KEY = "mockBraintrustApiKey"; +const DEFAULT_EVENT_TIMEOUT_MS = 5_000; +const DEFAULT_POLL_INTERVAL_MS = 50; + +export type EventPredicate = (event: CapturedLogEvent) => boolean; +export type PayloadPredicate = (payload: CapturedLogPayload) => boolean; + +export type WaitForEventOptions = { + pollIntervalMs?: number; + timeoutMs?: number; +}; + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function injectedString(key: string): string { + return inject(key as never) as string; +} + +function controlUrl(path: string): URL { + return new URL(path, injectedString(MOCK_BRAINTRUST_URL_KEY)); +} + +async function fetchControl( + path: string, + body: Record, +): Promise { + const response = await fetch(controlUrl(path), { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }); + + if (!response.ok) { + throw new Error( + `Test server request failed: ${response.status} ${response.statusText}`, + ); + } + + return (await response.json()) as T; +} + +function hasTestRunId(value: unknown, testRunId: string): boolean { + if (Array.isArray(value)) { + return value.some((entry) => hasTestRunId(entry, testRunId)); + } + + if (!isRecord(value)) { + return false; + } + + if (value.testRunId === testRunId) { + return true; + } + + return Object.values(value).some((entry) => hasTestRunId(entry, testRunId)); +} + +function eventBatch(after = 0): Promise { + return fetchControl("/_mock/events", { after }); +} + +function payloadBatch(after = 0): Promise { + return fetchControl("/_mock/payloads", { after }); +} + +function delay(ms: number): Promise { + return new Promise((resolve) => { + setTimeout(resolve, ms); + }); +} + +export function createTestRunId(): string { + return `e2e-${randomUUID()}`; +} + +export function getTestServerEnv(testRunId: string): Record { + const url = injectedString(MOCK_BRAINTRUST_URL_KEY); + return { + BRAINTRUST_API_KEY: injectedString(MOCK_BRAINTRUST_API_KEY_KEY), + BRAINTRUST_API_URL: url, + BRAINTRUST_APP_URL: url, + BRAINTRUST_E2E_RUN_ID: testRunId, + }; +} + +export function isTestRunEvent( + event: CapturedLogEvent, + testRunId: string, +): boolean { + return hasTestRunId(event.row, testRunId); +} + +export function isTestRunPayload( + payload: CapturedLogPayload, + testRunId: string, +): boolean { + return payload.rows.some((row) => hasTestRunId(row, testRunId)); +} + +export async function getEvents( + predicate?: EventPredicate, +): Promise { + const { events } = await eventBatch(); + return predicate ? events.filter(predicate) : events; +} + +export async function getPayloads( + predicate?: PayloadPredicate, +): Promise { + const { payloads } = await payloadBatch(); + return predicate ? payloads.filter(predicate) : payloads; +} + +export async function getPayloadsForRun( + testRunId: string, +): Promise { + return await getPayloads((payload) => isTestRunPayload(payload, testRunId)); +} + +export async function waitForEvent( + predicate: EventPredicate, + options: WaitForEventOptions = {}, +): Promise { + const timeoutMs = options.timeoutMs ?? DEFAULT_EVENT_TIMEOUT_MS; + const pollIntervalMs = options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS; + const deadline = Date.now() + timeoutMs; + let cursor = 0; + + while (Date.now() <= deadline) { + const batch = await eventBatch(cursor); + cursor = batch.cursor; + + const match = batch.events.find(predicate); + if (match) { + return match; + } + + if (Date.now() >= deadline) { + break; + } + + await delay(Math.min(pollIntervalMs, Math.max(deadline - Date.now(), 0))); + } + + throw new Error( + `Timed out waiting for a matching event after ${timeoutMs}ms`, + ); +} + +export async function waitForRunEvent( + testRunId: string, + predicate: EventPredicate, + options: WaitForEventOptions = {}, +): Promise { + return await waitForEvent( + (event) => isTestRunEvent(event, testRunId) && predicate(event), + options, + ); +} diff --git a/e2e/tests/helpers/mock-braintrust-server.ts b/e2e/tests/helpers/mock-braintrust-server.ts new file mode 100644 index 000000000..937627924 --- /dev/null +++ b/e2e/tests/helpers/mock-braintrust-server.ts @@ -0,0 +1,525 @@ +import { createServer } from "node:http"; +import type { IncomingHttpHeaders, ServerResponse } from "node:http"; +import type { AddressInfo } from "node:net"; + +export type JsonValue = + | null + | boolean + | number + | string + | JsonValue[] + | { [key: string]: JsonValue }; + +export type JsonObject = { [key: string]: JsonValue }; + +export interface CapturedRequest { + method: string; + path: string; + query: Record; + headers: Record; + rawBody: string; + jsonBody: JsonValue | null; +} + +export type CapturedLogRow = Record; + +export type CapturedLogPayload = { + api_version: number; + rows: CapturedLogRow[]; +}; + +export type CapturedLogEvent = { + apiVersion: number; + context?: Record; + expected?: unknown; + experimentId?: string; + input?: unknown; + isMerge: boolean; + metadata?: Record; + metrics?: Record; + output?: unknown; + projectId?: string; + row: CapturedLogRow; + scores?: unknown; + span: { + ended: boolean; + id?: string; + name?: string; + parentIds: string[]; + rootId?: string; + started: boolean; + type?: string; + }; +}; + +export interface MockBraintrustServer { + apiKey: string; + close: () => Promise; + url: string; +} + +export type CapturedLogEventBatch = { + cursor: number; + events: CapturedLogEvent[]; +}; + +export type CapturedLogPayloadBatch = { + cursor: number; + payloads: CapturedLogPayload[]; +}; + +const CONTROL_ROUTE_PREFIX = "/_mock"; +const DEFAULT_API_KEY = "mock-braintrust-api-key"; + +type ProjectRecord = { + id: string; + name: string; +}; + +type ExperimentRecord = { + created: string; + id: string; + name: string; + projectId: string; + projectName: string; +}; + +function slugify(value: string): string { + return value + .toLowerCase() + .replace(/[^a-z0-9]+/g, "-") + .replace(/^-+|-+$/g, ""); +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function normalizeHeaders( + headers: IncomingHttpHeaders, +): Record { + return Object.entries(headers).reduce>( + (normalized, [key, value]) => { + if (value === undefined) { + return normalized; + } + + normalized[key] = Array.isArray(value) ? value.join(", ") : value; + return normalized; + }, + {}, + ); +} + +function parseJson(rawBody: string): JsonValue | null { + if (!rawBody.trim()) { + return null; + } + + return JSON.parse(rawBody) as JsonValue; +} + +function respondJson( + response: ServerResponse, + statusCode: number, + body: unknown, +): void { + response.writeHead(statusCode, { "Content-Type": "application/json" }); + response.end(JSON.stringify(body)); +} + +function isAuthorized( + headers: Record, + apiKey: string, +): boolean { + return headers.authorization === `Bearer ${apiKey}`; +} + +function clone(value: T): T { + return structuredClone(value); +} + +function parsePayload(request: CapturedRequest): CapturedLogPayload | null { + const body = request.jsonBody; + if (!isRecord(body) || !Array.isArray(body.rows)) { + return null; + } + + return { + api_version: typeof body.api_version === "number" ? body.api_version : 0, + rows: body.rows.reduce((capturedRows, row) => { + if (isRecord(row)) { + capturedRows.push(clone(row)); + } + return capturedRows; + }, []), + }; +} + +function rowKey(row: CapturedLogRow): string { + return JSON.stringify( + [ + "org_id", + "project_id", + "experiment_id", + "dataset_id", + "prompt_session_id", + "log_id", + "id", + ].map((key) => row[key]), + ); +} + +function mergeValue(base: unknown, incoming: unknown): unknown { + if (isRecord(base) && isRecord(incoming)) { + const merged: Record = { ...base }; + for (const [key, value] of Object.entries(incoming)) { + merged[key] = key in merged ? mergeValue(merged[key], value) : value; + } + return merged; + } + + return incoming; +} + +function mergeRow( + existing: CapturedLogRow | undefined, + incoming: CapturedLogRow, +): CapturedLogRow { + if (!existing || !incoming._is_merge) { + return clone(incoming); + } + + const preserveNoMerge = !existing._is_merge; + const merged = mergeValue(existing, incoming) as CapturedLogRow; + if (preserveNoMerge) { + delete merged._is_merge; + } + return clone(merged); +} + +function recordField(value: unknown): Record | undefined { + return isRecord(value) ? clone(value) : undefined; +} + +function stringField(value: unknown): string | undefined { + return typeof value === "string" ? value : undefined; +} + +function arrayOfStrings(value: unknown): string[] { + if (!Array.isArray(value)) { + return []; + } + + return value.filter((entry): entry is string => typeof entry === "string"); +} + +function toCapturedLogEvent( + apiVersion: number, + row: CapturedLogRow, + rawRow: CapturedLogRow, +): CapturedLogEvent { + const spanAttributes = recordField(row.span_attributes); + const metrics = recordField(row.metrics); + + return { + apiVersion, + context: recordField(row.context), + expected: clone(row.expected), + experimentId: stringField(row.experiment_id), + input: clone(row.input), + isMerge: rawRow._is_merge === true, + metadata: recordField(row.metadata), + metrics, + output: clone(row.output), + projectId: stringField(row.project_id), + row: clone(row), + scores: clone(row.scores), + span: { + ended: typeof metrics?.end === "number", + id: stringField(row.span_id), + name: stringField(spanAttributes?.name), + parentIds: arrayOfStrings(row.span_parents), + rootId: stringField(row.root_span_id), + started: typeof metrics?.start === "number", + type: stringField(spanAttributes?.type), + }, + }; +} + +export async function startMockBraintrustServer( + apiKey = DEFAULT_API_KEY, +): Promise { + const requests: CapturedRequest[] = []; + const payloads: CapturedLogPayload[] = []; + const events: CapturedLogEvent[] = []; + const mergedRows = new Map(); + const projects = new Map(); + const experiments = new Map(); + let serverUrl = ""; + + const server = createServer((req, res) => { + void (async () => { + try { + const requestUrl = new URL( + req.url ?? "/", + serverUrl || "http://127.0.0.1", + ); + const rawBody = await new Promise((resolve, reject) => { + const chunks: Buffer[] = []; + req.on("data", (chunk) => chunks.push(Buffer.from(chunk))); + req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8"))); + req.on("error", reject); + }); + + const capturedRequest: CapturedRequest = { + method: req.method ?? "GET", + path: requestUrl.pathname, + query: Object.fromEntries(requestUrl.searchParams.entries()), + headers: normalizeHeaders(req.headers), + rawBody, + jsonBody: parseJson(rawBody), + }; + + if (capturedRequest.path.startsWith(CONTROL_ROUTE_PREFIX)) { + const body = isRecord(capturedRequest.jsonBody) + ? capturedRequest.jsonBody + : {}; + const after = + typeof body.after === "number" && body.after >= 0 ? body.after : 0; + + if ( + capturedRequest.method === "POST" && + capturedRequest.path === `${CONTROL_ROUTE_PREFIX}/events` + ) { + respondJson(res, 200, { + cursor: events.length, + events: events.slice(after), + } satisfies CapturedLogEventBatch); + return; + } + + if ( + capturedRequest.method === "POST" && + capturedRequest.path === `${CONTROL_ROUTE_PREFIX}/payloads` + ) { + respondJson(res, 200, { + cursor: payloads.length, + payloads: payloads.slice(after), + } satisfies CapturedLogPayloadBatch); + return; + } + + respondJson(res, 404, { + error: `Unhandled mock control route: ${capturedRequest.method} ${capturedRequest.path}`, + }); + return; + } + + requests.push(capturedRequest); + + if (!isAuthorized(capturedRequest.headers, apiKey)) { + respondJson(res, 401, { error: "unauthorized" }); + return; + } + + if ( + capturedRequest.method === "POST" && + capturedRequest.path === "/api/apikey/login" + ) { + respondJson(res, 200, { + org_info: [ + { + id: "org:e2e", + name: "e2e-org", + api_url: serverUrl, + proxy_url: null, + git_metadata: { collect: "none" }, + }, + ], + }); + return; + } + + if ( + capturedRequest.method === "POST" && + capturedRequest.path === "/api/project/register" + ) { + const body = (capturedRequest.jsonBody ?? {}) as { + project_name?: string; + }; + const projectName = body.project_name ?? "global"; + const project = projects.get(projectName) ?? { + id: `project:${slugify(projectName) || "global"}`, + name: projectName, + }; + projects.set(projectName, project); + + respondJson(res, 200, { project }); + return; + } + + if ( + capturedRequest.method === "GET" && + capturedRequest.path === "/api/project" + ) { + const projectId = capturedRequest.query.id ?? "project:unknown"; + const project = [...projects.values()].find( + (candidate) => candidate.id === projectId, + ) ?? { + id: projectId, + name: projectId.replace(/^project:/, ""), + }; + + respondJson(res, 200, { name: project.name, project }); + return; + } + + if ( + capturedRequest.method === "POST" && + capturedRequest.path === "/api/experiment/register" + ) { + const body = (capturedRequest.jsonBody ?? {}) as { + project_name?: string; + project_id?: string; + experiment_name?: string; + }; + const projectName = body.project_name ?? body.project_id ?? "project"; + const project = projects.get(projectName) ?? { + id: + body.project_id ?? `project:${slugify(projectName) || "project"}`, + name: projectName, + }; + projects.set(project.name, project); + + const experimentName = body.experiment_name ?? "experiment"; + const experimentKey = `${project.id}:${experimentName}`; + const experiment = experiments.get(experimentKey) ?? { + id: `experiment:${slugify(experimentName) || "experiment"}`, + name: experimentName, + created: "2026-01-01T00:00:00.000Z", + projectId: project.id, + projectName: project.name, + }; + experiments.set(experimentKey, experiment); + + respondJson(res, 200, { + project, + experiment: { + id: experiment.id, + name: experiment.name, + created: experiment.created, + }, + }); + return; + } + + if ( + capturedRequest.method === "POST" && + capturedRequest.path === "/api/experiment/get" + ) { + const body = (capturedRequest.jsonBody ?? {}) as { + project_name?: string; + project_id?: string; + experiment_name?: string; + }; + const projectKey = body.project_name ?? body.project_id ?? "project"; + const project = projects.get(projectKey) ?? { + id: `project:${slugify(projectKey) || "project"}`, + name: projectKey, + }; + const experimentName = body.experiment_name ?? "experiment"; + const experiment = experiments.get( + `${project.id}:${experimentName}`, + ) ?? { + id: `experiment:${slugify(experimentName) || "experiment"}`, + name: experimentName, + created: "2026-01-01T00:00:00.000Z", + projectId: project.id, + projectName: project.name, + }; + + respondJson(res, 200, [ + { + id: experiment.id, + name: experiment.name, + project_id: experiment.projectId, + created: experiment.created, + }, + ]); + return; + } + + if ( + capturedRequest.method === "POST" && + capturedRequest.path === "/api/base_experiment/get_id" + ) { + respondJson(res, 400, { error: "no base experiment" }); + return; + } + + if ( + capturedRequest.method === "GET" && + capturedRequest.path === "/experiment-comparison2" + ) { + respondJson(res, 200, { scores: {}, metrics: {} }); + return; + } + + if ( + capturedRequest.method === "GET" && + capturedRequest.path === "/version" + ) { + respondJson(res, 200, { logs3_payload_max_bytes: null }); + return; + } + + if ( + capturedRequest.method === "POST" && + capturedRequest.path === "/logs3" + ) { + const payload = parsePayload(capturedRequest); + if (payload) { + payloads.push(payload); + + for (const row of payload.rows) { + const key = rowKey(row); + const mergedRow = mergeRow(mergedRows.get(key), row); + mergedRows.set(key, mergedRow); + events.push( + toCapturedLogEvent(payload.api_version, mergedRow, row), + ); + } + } + + respondJson(res, 200, { ok: true }); + return; + } + + respondJson(res, 404, { + error: `Unhandled mock route: ${capturedRequest.method} ${capturedRequest.path}`, + }); + } catch (error) { + respondJson(res, 500, { + error: error instanceof Error ? error.message : String(error), + }); + } + })(); + }); + + serverUrl = await new Promise((resolve) => { + server.listen(0, "127.0.0.1", () => { + const address = server.address() as AddressInfo; + resolve(`http://127.0.0.1:${address.port}`); + }); + }); + + return { + apiKey, + close: () => + new Promise((resolve, reject) => { + server.close((error) => (error ? reject(error) : resolve())); + }), + url: serverUrl, + }; +} diff --git a/e2e/tests/helpers/normalize.ts b/e2e/tests/helpers/normalize.ts new file mode 100644 index 000000000..45240fa6b --- /dev/null +++ b/e2e/tests/helpers/normalize.ts @@ -0,0 +1,117 @@ +type Primitive = null | boolean | number | string; +export type Json = + | Primitive + | Json[] + | { + [key: string]: Json; + }; + +type TokenMaps = { + ids: Map; + runs: Map; + xacts: Map; +}; + +const ISO_DATE_REGEX = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z$/; +const UUID_REGEX = + /^[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; +const TIME_KEYS = new Set(["created", "start", "end"]); +const SPAN_ID_KEYS = new Set(["id", "span_id", "root_span_id"]); + +function normalizeCallerFilename(value: string): string { + const e2eIndex = value.lastIndexOf("/e2e/"); + if (e2eIndex >= 0) { + return `${value.slice(e2eIndex)}`; + } + + return value; +} + +function tokenFor( + map: Map, + rawValue: string, + prefix: string, +): string { + const existing = map.get(rawValue); + if (existing) { + return existing; + } + + const token = `<${prefix}:${map.size + 1}>`; + map.set(rawValue, token); + return token; +} + +function normalizeValue( + value: Json, + tokenMaps: TokenMaps, + currentKey?: string, +): Json { + if (Array.isArray(value)) { + if (currentKey === "span_parents") { + return value.map((entry) => + typeof entry === "string" + ? tokenFor(tokenMaps.ids, entry, "span") + : normalizeValue(entry, tokenMaps), + ); + } + + return value.map((entry) => normalizeValue(entry, tokenMaps)); + } + + if (value && typeof value === "object") { + return Object.fromEntries( + Object.entries(value).map(([key, entry]) => [ + key, + normalizeValue(entry as Json, tokenMaps, key), + ]), + ); + } + + if (typeof value === "number") { + if (currentKey && TIME_KEYS.has(currentKey)) { + return 0; + } + return value; + } + + if (typeof value === "string") { + if (currentKey === "caller_filename") { + return normalizeCallerFilename(value); + } + + if (currentKey === "_xact_id") { + return tokenFor(tokenMaps.xacts, value, "xact"); + } + + if (currentKey === "testRunId") { + return tokenFor(tokenMaps.runs, value, "run"); + } + + if (currentKey && SPAN_ID_KEYS.has(currentKey)) { + return tokenFor(tokenMaps.ids, value, "span"); + } + + if (currentKey && TIME_KEYS.has(currentKey)) { + return ""; + } + + if (ISO_DATE_REGEX.test(value)) { + return ""; + } + + if (UUID_REGEX.test(value)) { + return tokenFor(tokenMaps.ids, value, "uuid"); + } + } + + return value; +} + +export function normalizeForSnapshot(value: Json): Json { + return normalizeValue(value, { + ids: new Map(), + runs: new Map(), + xacts: new Map(), + }); +} diff --git a/e2e/tests/helpers/run-scenario.ts b/e2e/tests/helpers/run-scenario.ts new file mode 100644 index 000000000..1b4da0299 --- /dev/null +++ b/e2e/tests/helpers/run-scenario.ts @@ -0,0 +1,79 @@ +import { spawn } from "node:child_process"; +import * as path from "node:path"; + +export interface ScenarioResult { + exitCode: number; + stdout: string; + stderr: string; +} + +const tsxCliPath = require.resolve("tsx/cli"); +const packageRoot = process.cwd(); +const DEFAULT_SCENARIO_TIMEOUT_MS = 15_000; + +export async function runScenario( + relativeScenarioPath: string, + env: Record, + timeoutMs = DEFAULT_SCENARIO_TIMEOUT_MS, +): Promise { + const scenarioPath = path.join(packageRoot, relativeScenarioPath); + + return await new Promise((resolve, reject) => { + const child = spawn(process.execPath, [tsxCliPath, scenarioPath], { + cwd: packageRoot, + env: { + ...process.env, + ...env, + }, + stdio: ["ignore", "pipe", "pipe"], + }); + const timeout = setTimeout(() => { + child.kill("SIGTERM"); + reject( + new Error( + `Scenario ${relativeScenarioPath} timed out after ${timeoutMs}ms`, + ), + ); + }, timeoutMs); + + let stdout = ""; + let stderr = ""; + + child.stdout.on("data", (chunk) => { + stdout += chunk.toString(); + }); + + child.stderr.on("data", (chunk) => { + stderr += chunk.toString(); + }); + + child.on("error", (error) => { + clearTimeout(timeout); + reject(error); + }); + child.on("close", (code) => { + clearTimeout(timeout); + resolve({ + exitCode: code ?? 0, + stdout, + stderr, + }); + }); + }); +} + +export async function runScenarioOrThrow( + relativeScenarioPath: string, + env: Record, + timeoutMs?: number, +): Promise { + const result = await runScenario(relativeScenarioPath, env, timeoutMs); + + if (result.exitCode !== 0) { + throw new Error( + `Scenario ${relativeScenarioPath} failed with exit code ${result.exitCode}\nSTDOUT:\n${result.stdout}\nSTDERR:\n${result.stderr}`, + ); + } + + return result; +} diff --git a/e2e/tests/logger-basic.test.ts b/e2e/tests/logger-basic.test.ts new file mode 100644 index 000000000..0cce4ed5b --- /dev/null +++ b/e2e/tests/logger-basic.test.ts @@ -0,0 +1,43 @@ +import { expect, test } from "vitest"; +import { + createTestRunId, + getPayloadsForRun, + getTestServerEnv, + waitForRunEvent, +} from "./helpers/ingestion"; +import { normalizeForSnapshot, type Json } from "./helpers/normalize"; +import { runScenarioOrThrow } from "./helpers/run-scenario"; + +test("logger-basic registers a project and sends normalized project logs", async () => { + const testRunId = createTestRunId(); + const rootSpanPromise = waitForRunEvent( + testRunId, + (event) => event.span.name === "root-span" && event.span.ended, + ); + const childSpanPromise = waitForRunEvent( + testRunId, + (event) => event.span.name === "child-span" && event.span.ended, + ); + + await runScenarioOrThrow( + "scenarios/logger-basic.ts", + getTestServerEnv(testRunId), + ); + + const [rootSpanEvent, childSpanEvent] = await Promise.all([ + rootSpanPromise, + childSpanPromise, + ]); + + expect(normalizeForSnapshot(rootSpanEvent.row as Json)).toMatchSnapshot( + "root-span", + ); + expect(normalizeForSnapshot(childSpanEvent.row as Json)).toMatchSnapshot( + "child-span", + ); + + const logs3Payloads = await getPayloadsForRun(testRunId); + expect(normalizeForSnapshot(logs3Payloads as Json)).toMatchSnapshot( + "logs3-payloads", + ); +}); diff --git a/e2e/tsconfig.json b/e2e/tsconfig.json new file mode 100644 index 000000000..e7a09c397 --- /dev/null +++ b/e2e/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "lib": ["es2022"], + "module": "nodenext", + "target": "es2022", + "moduleResolution": "nodenext", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "types": ["node", "vitest/globals"] + }, + "include": ["."], + "exclude": ["node_modules/**", "**/dist/**"] +} diff --git a/e2e/turbo.json b/e2e/turbo.json new file mode 100644 index 000000000..3adcb89ef --- /dev/null +++ b/e2e/turbo.json @@ -0,0 +1,3 @@ +{ + "extends": ["//"] +} diff --git a/e2e/vitest.config.mts b/e2e/vitest.config.mts new file mode 100644 index 000000000..3b0933b87 --- /dev/null +++ b/e2e/vitest.config.mts @@ -0,0 +1,10 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + globalSetup: ["./tests/global-setup.ts"], + hookTimeout: 20_000, + include: ["tests/**/*.test.ts"], + testTimeout: 20_000, + }, +}); diff --git a/package.json b/package.json index 03fb3d1b2..bde676962 100644 --- a/package.json +++ b/package.json @@ -15,6 +15,7 @@ "clean": "turbo run clean", "knip": "knip --config knip.jsonc --no-config-hints", "test": "dotenv -e .env -- turbo run test --filter=\"!@braintrust/otel\"", + "test:e2e": "dotenv -e .env -- turbo run test:e2e", "playground": "dotenv -e .env -- turbo run playground --filter=\"braintrust\"", "prepare": "husky || true", "lint:prettier": "prettier --check .", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 11c5043ff..4873f2a9e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -30,6 +30,24 @@ importers: specifier: ^2.5.6 version: 2.5.6 + e2e: + devDependencies: + '@types/node': + specifier: ^20.10.5 + version: 20.19.16 + braintrust: + specifier: workspace:^ + version: link:../js + tsx: + specifier: ^3.14.0 + version: 3.14.0 + typescript: + specifier: 5.4.4 + version: 5.4.4 + vitest: + specifier: ^2.1.9 + version: 2.1.9(@types/node@20.19.16)(msw@2.6.6(@types/node@20.19.16)(typescript@5.4.4))(terser@5.44.1) + integrations/browser-js: dependencies: als-browser: diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index aa39e5413..fc34687e1 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -1,4 +1,5 @@ packages: + - e2e - js - integrations/* - internal/golden diff --git a/turbo.json b/turbo.json index 8e7b38b02..ebc767257 100644 --- a/turbo.json +++ b/turbo.json @@ -12,6 +12,11 @@ "dependsOn": ["^build"], "outputs": [] }, + "test:e2e": { + "env": ["ANTHROPIC_API_KEY", "BRAINTRUST_API_KEY", "OPENAI_API_KEY"], + "dependsOn": ["^build"], + "outputs": [] + }, "lint": { "outputs": [] }, From 724977e8f5d927e37894b7d54064947ca04587e8 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Thu, 5 Mar 2026 15:54:57 -0800 Subject: [PATCH 02/11] Add readme --- e2e/README.md | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 e2e/README.md diff --git a/e2e/README.md b/e2e/README.md new file mode 100644 index 000000000..08ffea44d --- /dev/null +++ b/e2e/README.md @@ -0,0 +1,54 @@ +# E2E Tests + +End-to-end tests that validate the Braintrust SDK by running real SDK usage scenarios against a mock server. + +## How It Works + +1. A **mock Braintrust server** starts before all tests (via Vitest global setup) +2. Each test spawns a **scenario script** as a subprocess using `tsx`, with env vars pointing at the mock server +3. The scenario uses the SDK normally (init, create spans, log data, flush) +4. The test waits for expected events to arrive at the mock server, then **normalizes** and **snapshots** them + +Subprocess isolation ensures the SDK operates exactly as it would in production. + +## Structure + +``` +e2e/ +├── scenarios/ # Standalone scripts that use the SDK (run as subprocesses) +├── tests/ +│ ├── helpers/ # Test utilities (see below) +│ ├── global-setup.ts # Starts mock server, injects URL + API key into test context +│ ├── *.test.ts # Test files +│ └── __snapshots__/ # Vitest snapshot files +└── vitest.config.mts +``` + +## Helpers (`tests/helpers/`) + +- `mock-braintrust-server.ts` — Mock Braintrust API server (started automatically via global setup). +- `run-scenario.ts` — Spawns scenario scripts as subprocesses. +- `ingestion.ts` — Utilities for retrieving and waiting on data captured by the mock server. +- `normalize.ts` — Makes captured data deterministic for snapshot testing. + +### Writing a new test + +Use `runScenarioOrThrow(scenarioFile, env)` to execute a scenario. It runs the file with `tsx`, passes your env vars, and throws on non-zero exit. Default timeout is 15s. + +The main utilities you'll use in test files: + +- `createTestRunId()` — Returns a unique `e2e-{uuid}` string. Pass it to your scenario via env vars so you can filter events for your test. +- `getTestServerEnv(testRunId)` — Returns the env vars a scenario needs to talk to the mock server (`BRAINTRUST_API_URL`, `BRAINTRUST_API_KEY`, `TEST_RUN_ID`). +- `waitForRunEvent(testRunId, predicate)` — Polls the mock server until an event matching the test run ID and predicate arrives (5s timeout, 50ms interval). Returns the matched `CapturedLogEvent`. +- `waitForEvent(predicate)` — Same as above but without filtering by test run ID. +- `getPayloadsForRun(testRunId)` — Returns all raw `logs3` payloads for a given test run. +- `getEvents()` / `getPayloads()` — Low-level access to all captured events/payloads, with optional predicate filtering. + +Use `normalizeEvent(event)` and `normalizePayloads(payloads)` before snapshotting. Replaces timestamps with ``, UUIDs with indexed tokens (``, ``, ``, ``), and absolute file paths with relative ones. + +## Running + +```bash +pnpm run test:e2e # Run tests +pnpm run test:e2e:update # Run tests and update snapshots +``` From 1fca60220e1af7237dff8c494ddffdb7f8229657 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Fri, 6 Mar 2026 15:58:05 -0800 Subject: [PATCH 03/11] test(e2e): Add some initial test scenarios --- .github/workflows/e2e.yaml | 1 + e2e/package.json | 7 + e2e/scenarios/experiment-basic.ts | 79 --- e2e/scenarios/helpers.ts | 44 ++ e2e/scenarios/logger-basic.ts | 72 --- ...ai-auto-instrumentation-node-hook.impl.mjs | 66 +++ ...to-instrumentation-node-hook.openai-v4.mjs | 4 + ...to-instrumentation-node-hook.openai-v5.mjs | 4 + ...to-instrumentation-node-hook.openai-v6.mjs | 4 + e2e/scenarios/otel-compat-mixed-tracing.ts | 75 +++ e2e/scenarios/otel-span-processor-export.ts | 33 ++ .../trace-context-and-continuation.ts | 99 ++++ e2e/scenarios/trace-primitives-basic.ts | 68 +++ .../wrap-openai-conversation-traces.impl.ts | 107 ++++ ...ap-openai-conversation-traces.openai-v4.ts | 5 + ...ap-openai-conversation-traces.openai-v5.ts | 5 + ...ap-openai-conversation-traces.openai-v6.ts | 5 + .../experiment-basic.test.ts.snap | 183 ------- .../__snapshots__/logger-basic.test.ts.snap | 182 ------- ...uto-instrumentation-node-hook.test.ts.snap | 157 ++++++ .../otel-compat-mixed-tracing.test.ts.snap | 46 ++ ...race-context-and-continuation.test.ts.snap | 134 +++++ .../trace-primitives-basic.test.ts.snap | 379 +++++++++++++ ...ap-openai-conversation-traces.test.ts.snap | 496 ++++++++++++++++++ e2e/tests/experiment-basic.test.ts | 43 -- e2e/tests/global-setup.ts | 14 - e2e/tests/helpers/ingestion.ts | 171 ------ e2e/tests/helpers/mock-braintrust-server.ts | 365 +++++-------- e2e/tests/helpers/normalize.ts | 73 ++- e2e/tests/helpers/openai.ts | 70 +++ e2e/tests/helpers/run-scenario.ts | 79 --- e2e/tests/helpers/scenario-harness.ts | 200 +++++++ e2e/tests/helpers/trace-selectors.ts | 40 ++ e2e/tests/helpers/trace-summary.ts | 148 ++++++ e2e/tests/logger-basic.test.ts | 43 -- ...nai-auto-instrumentation-node-hook.test.ts | 49 ++ e2e/tests/otel-compat-mixed-tracing.test.ts | 41 ++ e2e/tests/otel-span-processor-export.test.ts | 34 ++ .../trace-context-and-continuation.test.ts | 70 +++ e2e/tests/trace-primitives-basic.test.ts | 55 ++ .../wrap-openai-conversation-traces.test.ts | 128 +++++ e2e/vitest.config.mts | 1 - .../configs/openai.test.ts | 90 +++- .../auto-instrumentations/configs/openai.ts | 34 +- package.json | 1 + pnpm-lock.yaml | 50 +- turbo.json | 30 +- 47 files changed, 2923 insertions(+), 1161 deletions(-) delete mode 100644 e2e/scenarios/experiment-basic.ts create mode 100644 e2e/scenarios/helpers.ts delete mode 100644 e2e/scenarios/logger-basic.ts create mode 100644 e2e/scenarios/openai-auto-instrumentation-node-hook.impl.mjs create mode 100644 e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v4.mjs create mode 100644 e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v5.mjs create mode 100644 e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v6.mjs create mode 100644 e2e/scenarios/otel-compat-mixed-tracing.ts create mode 100644 e2e/scenarios/otel-span-processor-export.ts create mode 100644 e2e/scenarios/trace-context-and-continuation.ts create mode 100644 e2e/scenarios/trace-primitives-basic.ts create mode 100644 e2e/scenarios/wrap-openai-conversation-traces.impl.ts create mode 100644 e2e/scenarios/wrap-openai-conversation-traces.openai-v4.ts create mode 100644 e2e/scenarios/wrap-openai-conversation-traces.openai-v5.ts create mode 100644 e2e/scenarios/wrap-openai-conversation-traces.openai-v6.ts delete mode 100644 e2e/tests/__snapshots__/experiment-basic.test.ts.snap delete mode 100644 e2e/tests/__snapshots__/logger-basic.test.ts.snap create mode 100644 e2e/tests/__snapshots__/openai-auto-instrumentation-node-hook.test.ts.snap create mode 100644 e2e/tests/__snapshots__/otel-compat-mixed-tracing.test.ts.snap create mode 100644 e2e/tests/__snapshots__/trace-context-and-continuation.test.ts.snap create mode 100644 e2e/tests/__snapshots__/trace-primitives-basic.test.ts.snap create mode 100644 e2e/tests/__snapshots__/wrap-openai-conversation-traces.test.ts.snap delete mode 100644 e2e/tests/experiment-basic.test.ts delete mode 100644 e2e/tests/global-setup.ts delete mode 100644 e2e/tests/helpers/ingestion.ts create mode 100644 e2e/tests/helpers/openai.ts delete mode 100644 e2e/tests/helpers/run-scenario.ts create mode 100644 e2e/tests/helpers/scenario-harness.ts create mode 100644 e2e/tests/helpers/trace-selectors.ts create mode 100644 e2e/tests/helpers/trace-summary.ts delete mode 100644 e2e/tests/logger-basic.test.ts create mode 100644 e2e/tests/openai-auto-instrumentation-node-hook.test.ts create mode 100644 e2e/tests/otel-compat-mixed-tracing.test.ts create mode 100644 e2e/tests/otel-span-processor-export.test.ts create mode 100644 e2e/tests/trace-context-and-continuation.test.ts create mode 100644 e2e/tests/trace-primitives-basic.test.ts create mode 100644 e2e/tests/wrap-openai-conversation-traces.test.ts diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 1d9183a0d..764a76855 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -22,6 +22,7 @@ jobs: timeout-minutes: 20 env: + BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} diff --git a/e2e/package.json b/e2e/package.json index b7266cc01..4cec858eb 100644 --- a/e2e/package.json +++ b/e2e/package.json @@ -7,8 +7,15 @@ "test:e2e:update": "vitest run --update" }, "devDependencies": { + "@braintrust/otel": "workspace:^", + "@opentelemetry/api": ">=1.9.0", + "@opentelemetry/context-async-hooks": ">=1.9.0", + "@opentelemetry/sdk-trace-base": ">=1.9.0", "@types/node": "^20.10.5", "braintrust": "workspace:^", + "openai": "6.25.0", + "openai-v4": "npm:openai@4.104.0", + "openai-v5": "npm:openai@5.11.0", "tsx": "^3.14.0", "typescript": "5.4.4", "vitest": "^2.1.9" diff --git a/e2e/scenarios/experiment-basic.ts b/e2e/scenarios/experiment-basic.ts deleted file mode 100644 index b4d74a70d..000000000 --- a/e2e/scenarios/experiment-basic.ts +++ /dev/null @@ -1,79 +0,0 @@ -import { initExperiment } from "braintrust"; - -async function main() { - const testRunId = process.env.BRAINTRUST_E2E_RUN_ID ?? "missing-test-run-id"; - const experiment = initExperiment("e2e-evals", { - experiment: "logger-e2e", - metadata: { - suite: "e2e", - }, - tags: ["e2e"], - }); - - await experiment.traced( - async (rootSpan) => { - rootSpan.log({ - output: { - completion: "done", - }, - expected: { - completion: "done", - }, - scores: { - pass: 1, - }, - metadata: { - scenario: "experiment-basic", - record: "sample-1", - testRunId, - }, - }); - - rootSpan.traced( - (childSpan) => { - childSpan.log({ - output: { - tool: "lookup", - status: "success", - }, - metadata: { - stage: "child", - testRunId, - }, - }); - }, - { - name: "tool-span", - event: { - input: { - tool: "lookup", - testRunId, - }, - metadata: { - testRunId, - }, - }, - }, - ); - }, - { - name: "experiment-root", - event: { - input: { - prompt: "Run an evaluation", - testRunId, - }, - metadata: { - testRunId, - }, - }, - }, - ); - - await experiment.flush(); -} - -void main().catch((error) => { - console.error(error); - process.exitCode = 1; -}); diff --git a/e2e/scenarios/helpers.ts b/e2e/scenarios/helpers.ts new file mode 100644 index 000000000..c3cf246a6 --- /dev/null +++ b/e2e/scenarios/helpers.ts @@ -0,0 +1,44 @@ +import { BasicTracerProvider } from "@opentelemetry/sdk-trace-base"; + +export async function collectAsync(records: AsyncIterable): Promise { + const items: T[] = []; + for await (const record of records) { + items.push(record); + } + return items; +} + +export function getTestRunId(): string { + return process.env.BRAINTRUST_E2E_RUN_ID!; +} + +export function scopedName(base: string, testRunId = getTestRunId()): string { + const suffix = testRunId.toLowerCase().replace(/[^a-z0-9-]/g, "-"); + return `${base}-${suffix}`; +} + +export function createTracerProvider(processors: unknown[]) { + const testProvider = new BasicTracerProvider(); + + if ( + typeof (testProvider as { addSpanProcessor?: unknown }).addSpanProcessor === + "function" + ) { + const provider = new BasicTracerProvider() as BasicTracerProvider & { + addSpanProcessor: (processor: unknown) => void; + }; + processors.forEach((processor) => provider.addSpanProcessor(processor)); + return provider; + } + + return new BasicTracerProvider({ + spanProcessors: processors as never, + }); +} + +export function runMain(main: () => Promise): void { + void main().catch((error) => { + console.error(error); + process.exitCode = 1; + }); +} diff --git a/e2e/scenarios/logger-basic.ts b/e2e/scenarios/logger-basic.ts deleted file mode 100644 index c38acd36a..000000000 --- a/e2e/scenarios/logger-basic.ts +++ /dev/null @@ -1,72 +0,0 @@ -import { initLogger } from "braintrust"; - -async function main() { - const testRunId = process.env.BRAINTRUST_E2E_RUN_ID ?? "missing-test-run-id"; - const logger = initLogger({ - projectName: "e2e-project-logs", - }); - - await logger.traced( - async (rootSpan) => { - rootSpan.log({ - output: { - answer: "4", - explanation: "basic arithmetic", - }, - metadata: { - scenario: "logger-basic", - stage: "root", - testRunId, - }, - scores: { - correct: 1, - }, - }); - - rootSpan.traced( - (childSpan) => { - childSpan.log({ - output: { - detail: "child completed", - }, - metadata: { - stage: "child", - testRunId, - }, - }); - }, - { - name: "child-span", - event: { - input: { - step: "child-work", - testRunId, - }, - metadata: { - testRunId, - }, - }, - }, - ); - }, - { - name: "root-span", - event: { - input: { - question: "What is 2 + 2?", - testRunId, - }, - metadata: { - testRunId, - }, - }, - }, - ); - - await logger.flush(); -} - -void main().catch((error) => { - console.error(error); - process.exitCode = 1; -}); diff --git a/e2e/scenarios/openai-auto-instrumentation-node-hook.impl.mjs b/e2e/scenarios/openai-auto-instrumentation-node-hook.impl.mjs new file mode 100644 index 000000000..8fba754eb --- /dev/null +++ b/e2e/scenarios/openai-auto-instrumentation-node-hook.impl.mjs @@ -0,0 +1,66 @@ +import { initLogger } from "braintrust"; + +const OPENAI_MODEL = "gpt-4o-mini"; + +function getTestRunId() { + return process.env.BRAINTRUST_E2E_RUN_ID; +} + +function scopedName(base, testRunId = getTestRunId()) { + const suffix = testRunId.toLowerCase().replace(/[^a-z0-9-]/g, "-"); + return `${base}-${suffix}`; +} + +export async function runOpenAIAutoInstrumentationNodeHook( + OpenAI, + openaiSdkVersion, +) { + const testRunId = getTestRunId(); + const logger = initLogger({ + projectName: scopedName("e2e-openai-auto-instrumentation-hook", testRunId), + }); + const client = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, + baseURL: process.env.OPENAI_BASE_URL, + }); + + await logger.traced( + async () => { + await client.chat.completions.create({ + model: OPENAI_MODEL, + messages: [ + { + role: "user", + content: "Auto-instrument this request.", + }, + ], + max_tokens: 8, + temperature: 0, + }); + }, + { + name: "openai-auto-hook-root", + event: { + metadata: { + scenario: "openai-auto-instrumentation-node-hook", + openaiSdkVersion, + testRunId, + }, + }, + }, + ); + + await logger.flush(); +} + +export function runOpenAIAutoInstrumentationNodeHookOrExit( + OpenAI, + openaiSdkVersion, +) { + void runOpenAIAutoInstrumentationNodeHook(OpenAI, openaiSdkVersion).catch( + (error) => { + console.error(error); + process.exitCode = 1; + }, + ); +} diff --git a/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v4.mjs b/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v4.mjs new file mode 100644 index 000000000..fbfb90974 --- /dev/null +++ b/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v4.mjs @@ -0,0 +1,4 @@ +import OpenAI from "openai-v4"; +import { runOpenAIAutoInstrumentationNodeHookOrExit } from "./openai-auto-instrumentation-node-hook.impl.mjs"; + +runOpenAIAutoInstrumentationNodeHookOrExit(OpenAI, "4.104.0"); diff --git a/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v5.mjs b/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v5.mjs new file mode 100644 index 000000000..4611d8c5f --- /dev/null +++ b/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v5.mjs @@ -0,0 +1,4 @@ +import OpenAI from "openai-v5"; +import { runOpenAIAutoInstrumentationNodeHookOrExit } from "./openai-auto-instrumentation-node-hook.impl.mjs"; + +runOpenAIAutoInstrumentationNodeHookOrExit(OpenAI, "5.11.0"); diff --git a/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v6.mjs b/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v6.mjs new file mode 100644 index 000000000..818be68e3 --- /dev/null +++ b/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v6.mjs @@ -0,0 +1,4 @@ +import OpenAI from "openai"; +import { runOpenAIAutoInstrumentationNodeHookOrExit } from "./openai-auto-instrumentation-node-hook.impl.mjs"; + +runOpenAIAutoInstrumentationNodeHookOrExit(OpenAI, "6.25.0"); diff --git a/e2e/scenarios/otel-compat-mixed-tracing.ts b/e2e/scenarios/otel-compat-mixed-tracing.ts new file mode 100644 index 000000000..0601265a9 --- /dev/null +++ b/e2e/scenarios/otel-compat-mixed-tracing.ts @@ -0,0 +1,75 @@ +import { context as otelContext, trace } from "@opentelemetry/api"; +import { AsyncHooksContextManager } from "@opentelemetry/context-async-hooks"; +import { BraintrustSpanProcessor, setupOtelCompat } from "@braintrust/otel"; +import { getContextManager, initLogger } from "braintrust"; +import { + createTracerProvider, + getTestRunId, + runMain, + scopedName, +} from "./helpers"; + +async function main() { + const testRunId = getTestRunId(); + setupOtelCompat(); + + const contextManager = new AsyncHooksContextManager(); + contextManager.enable(); + otelContext.setGlobalContextManager(contextManager); + + try { + const processor = new BraintrustSpanProcessor({ + apiKey: process.env.BRAINTRUST_API_KEY!, + apiUrl: process.env.BRAINTRUST_API_URL!, + parent: `project_name:${scopedName("e2e-otel-compat-mixed-tracing", testRunId)}`, + }); + const provider = createTracerProvider([processor]); + trace.setGlobalTracerProvider(provider); + + const tracer = trace.getTracer("e2e-otel-compat"); + const logger = initLogger({ + projectName: scopedName("e2e-otel-compat-mixed-tracing", testRunId), + }); + const btRoot = logger.startSpan({ + name: "bt-root", + event: { + metadata: { + scenario: "otel-compat-mixed-tracing", + testRunId, + }, + }, + }); + const contextManagerFacade = getContextManager(); + + await contextManagerFacade.runInContext(btRoot, async () => { + await tracer.startActiveSpan("otel-middle", async (otelSpan) => { + const btChild = logger.startSpan({ + name: "bt-child-under-otel", + event: { + metadata: { + kind: "bt-child-under-otel", + testRunId, + }, + }, + }); + btChild.log({ + output: { + source: "otel-child-context", + }, + }); + btChild.end(); + otelSpan.end(); + }); + }); + btRoot.end(); + + await logger.flush(); + await processor.forceFlush(); + await (provider as { shutdown?: () => Promise }).shutdown?.(); + } finally { + otelContext.disable(); + contextManager.disable(); + } +} + +runMain(main); diff --git a/e2e/scenarios/otel-span-processor-export.ts b/e2e/scenarios/otel-span-processor-export.ts new file mode 100644 index 000000000..af487fb56 --- /dev/null +++ b/e2e/scenarios/otel-span-processor-export.ts @@ -0,0 +1,33 @@ +import { context, trace } from "@opentelemetry/api"; +import { BraintrustSpanProcessor } from "@braintrust/otel"; +import { + createTracerProvider, + getTestRunId, + runMain, + scopedName, +} from "./helpers"; + +async function main() { + const testRunId = getTestRunId(); + const processor = new BraintrustSpanProcessor({ + apiKey: process.env.BRAINTRUST_API_KEY!, + apiUrl: process.env.BRAINTRUST_API_URL!, + filterAISpans: true, + parent: `project_name:${scopedName("e2e-otel-span-processor-export", testRunId)}`, + }); + const provider = createTracerProvider([processor]); + trace.setGlobalTracerProvider(provider); + + const tracer = trace.getTracer("e2e-otel-export"); + const rootSpan = tracer.startSpan("root-operation"); + const rootContext = trace.setSpan(context.active(), rootSpan); + const aiSpan = tracer.startSpan("gen_ai.completion", undefined, rootContext); + aiSpan.setAttribute("gen_ai.system", "openai"); + aiSpan.end(); + rootSpan.end(); + + await processor.forceFlush(); + await (provider as { shutdown?: () => Promise }).shutdown?.(); +} + +runMain(main); diff --git a/e2e/scenarios/trace-context-and-continuation.ts b/e2e/scenarios/trace-context-and-continuation.ts new file mode 100644 index 000000000..156787469 --- /dev/null +++ b/e2e/scenarios/trace-context-and-continuation.ts @@ -0,0 +1,99 @@ +import { + flush, + initLogger, + startSpan, + traced, + updateSpan, + withCurrent, + withParent, +} from "braintrust"; +import { getTestRunId, runMain, scopedName } from "./helpers"; + +async function main() { + const testRunId = getTestRunId(); + const logger = initLogger({ + projectName: scopedName("e2e-trace-context-and-continuation", testRunId), + }); + + const rootSpan = logger.startSpan({ + name: "context-root", + event: { + metadata: { + scenario: "trace-context-and-continuation", + testRunId, + }, + }, + }); + const exportedRoot = await rootSpan.export(); + + await withCurrent(rootSpan, async () => { + const currentChild = startSpan({ + name: "current-child", + event: { + metadata: { + kind: "current-child", + testRunId, + }, + }, + }); + currentChild.log({ + output: { + source: "withCurrent", + }, + }); + currentChild.end(); + }); + + rootSpan.end(); + + await withParent(exportedRoot, async () => { + await traced( + (span) => { + span.log({ + output: { + resumed: true, + }, + }); + }, + { + name: "reattached-child", + event: { + metadata: { + kind: "reattached-child", + testRunId, + }, + }, + }, + ); + }); + + const updatableSpan = logger.startSpan({ + name: "late-update", + event: { + metadata: { + kind: "late-update", + testRunId, + }, + }, + }); + const exportedUpdatableSpan = await updatableSpan.export(); + updatableSpan.end(); + + await logger.flush(); + + updateSpan({ + exported: exportedUpdatableSpan, + metadata: { + kind: "late-update", + patched: true, + testRunId, + }, + output: { + state: "updated", + }, + }); + + await flush(); +} + +runMain(main); diff --git a/e2e/scenarios/trace-primitives-basic.ts b/e2e/scenarios/trace-primitives-basic.ts new file mode 100644 index 000000000..e3850d11b --- /dev/null +++ b/e2e/scenarios/trace-primitives-basic.ts @@ -0,0 +1,68 @@ +import { initLogger, logError, startSpan } from "braintrust"; +import { getTestRunId, runMain, scopedName } from "./helpers"; + +async function main() { + const testRunId = getTestRunId(); + const logger = initLogger({ + projectName: scopedName("e2e-trace-primitives-basic", testRunId), + }); + + await logger.traced( + async (rootSpan) => { + const childSpan = startSpan({ + name: "basic-child", + event: { + input: { + step: "child", + testRunId, + }, + metadata: { + kind: "basic-child", + testRunId, + }, + }, + }); + childSpan.log({ + output: { + ok: true, + }, + }); + childSpan.end(); + + const errorSpan = startSpan({ + name: "basic-error", + event: { + metadata: { + kind: "basic-error", + testRunId, + }, + }, + }); + logError(errorSpan, new Error("basic boom")); + errorSpan.end(); + + rootSpan.log({ + output: { + status: "ok", + }, + }); + }, + { + name: "trace-primitives-root", + event: { + input: { + scenario: "trace-primitives-basic", + testRunId, + }, + metadata: { + scenario: "trace-primitives-basic", + testRunId, + }, + }, + }, + ); + + await logger.flush(); +} + +runMain(main); diff --git a/e2e/scenarios/wrap-openai-conversation-traces.impl.ts b/e2e/scenarios/wrap-openai-conversation-traces.impl.ts new file mode 100644 index 000000000..9ad5fd501 --- /dev/null +++ b/e2e/scenarios/wrap-openai-conversation-traces.impl.ts @@ -0,0 +1,107 @@ +import { initLogger, startSpan, withCurrent, wrapOpenAI } from "braintrust"; +import { collectAsync, getTestRunId, scopedName } from "./helpers"; + +const OPENAI_MODEL = "gpt-4o-mini"; + +export async function runWrapOpenAIConversationTraces( + OpenAI: any, + openaiSdkVersion: string, +) { + const testRunId = getTestRunId(); + const logger = initLogger({ + projectName: scopedName("e2e-wrap-openai-conversation", testRunId), + }); + const client = wrapOpenAI( + new OpenAI({ + apiKey: process.env.OPENAI_API_KEY!, + baseURL: process.env.OPENAI_BASE_URL, + }), + ); + + await logger.traced( + async () => { + const chatSpan = startSpan({ + name: "openai-chat-operation", + event: { + metadata: { + operation: "chat", + testRunId, + }, + }, + }); + await withCurrent(chatSpan, async () => { + await client.chat.completions.create({ + model: OPENAI_MODEL, + messages: [ + { + role: "user", + content: "Reply with exactly OK.", + }, + ], + max_tokens: 8, + temperature: 0, + }); + }); + chatSpan.end(); + + const streamSpan = startSpan({ + name: "openai-stream-operation", + event: { + metadata: { + operation: "stream", + testRunId, + }, + }, + }); + await withCurrent(streamSpan, async () => { + const chatStream = await client.chat.completions.create({ + model: OPENAI_MODEL, + messages: [ + { + role: "user", + content: "Reply with exactly STREAM.", + }, + ], + stream: true, + max_tokens: 8, + temperature: 0, + stream_options: { + include_usage: true, + }, + }); + await collectAsync(chatStream); + }); + streamSpan.end(); + + const responsesSpan = startSpan({ + name: "openai-responses-operation", + event: { + metadata: { + operation: "responses", + testRunId, + }, + }, + }); + await withCurrent(responsesSpan, async () => { + await client.responses.create({ + model: OPENAI_MODEL, + input: "Reply with exactly PARIS.", + max_output_tokens: 16, + }); + }); + responsesSpan.end(); + }, + { + name: "openai-wrapper-root", + event: { + metadata: { + scenario: "wrap-openai-conversation-traces", + openaiSdkVersion, + testRunId, + }, + }, + }, + ); + + await logger.flush(); +} diff --git a/e2e/scenarios/wrap-openai-conversation-traces.openai-v4.ts b/e2e/scenarios/wrap-openai-conversation-traces.openai-v4.ts new file mode 100644 index 000000000..7ab124a56 --- /dev/null +++ b/e2e/scenarios/wrap-openai-conversation-traces.openai-v4.ts @@ -0,0 +1,5 @@ +import OpenAI from "openai-v4"; +import { runMain } from "./helpers"; +import { runWrapOpenAIConversationTraces } from "./wrap-openai-conversation-traces.impl"; + +runMain(() => runWrapOpenAIConversationTraces(OpenAI, "4.104.0")); diff --git a/e2e/scenarios/wrap-openai-conversation-traces.openai-v5.ts b/e2e/scenarios/wrap-openai-conversation-traces.openai-v5.ts new file mode 100644 index 000000000..31b35745e --- /dev/null +++ b/e2e/scenarios/wrap-openai-conversation-traces.openai-v5.ts @@ -0,0 +1,5 @@ +import OpenAI from "openai-v5"; +import { runMain } from "./helpers"; +import { runWrapOpenAIConversationTraces } from "./wrap-openai-conversation-traces.impl"; + +runMain(() => runWrapOpenAIConversationTraces(OpenAI, "5.11.0")); diff --git a/e2e/scenarios/wrap-openai-conversation-traces.openai-v6.ts b/e2e/scenarios/wrap-openai-conversation-traces.openai-v6.ts new file mode 100644 index 000000000..10f70bac2 --- /dev/null +++ b/e2e/scenarios/wrap-openai-conversation-traces.openai-v6.ts @@ -0,0 +1,5 @@ +import OpenAI from "openai"; +import { runMain } from "./helpers"; +import { runWrapOpenAIConversationTraces } from "./wrap-openai-conversation-traces.impl"; + +runMain(() => runWrapOpenAIConversationTraces(OpenAI, "6.25.0")); diff --git a/e2e/tests/__snapshots__/experiment-basic.test.ts.snap b/e2e/tests/__snapshots__/experiment-basic.test.ts.snap deleted file mode 100644 index 8309a2954..000000000 --- a/e2e/tests/__snapshots__/experiment-basic.test.ts.snap +++ /dev/null @@ -1,183 +0,0 @@ -// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html - -exports[`experiment-basic registers an experiment and sends normalized experiment logs > logs3-payloads 1`] = ` -[ - { - "api_version": 2, - "rows": [ - { - "_is_merge": false, - "context": { - "caller_filename": "/e2e/scenarios/experiment-basic.ts", - "caller_functionname": "main", - "caller_lineno": 13, - }, - "created": "", - "experiment_id": "experiment:logger-e2e", - "id": "", - "input": { - "prompt": "Run an evaluation", - "testRunId": "", - }, - "metadata": { - "testRunId": "", - }, - "metrics": { - "start": 0, - }, - "root_span_id": "", - "span_attributes": { - "exec_counter": 0, - "name": "experiment-root", - "type": "eval", - }, - "span_id": "", - }, - ], - }, - { - "api_version": 2, - "rows": [ - { - "_is_merge": true, - "expected": { - "completion": "done", - }, - "experiment_id": "experiment:logger-e2e", - "id": "", - "metadata": { - "record": "sample-1", - "scenario": "experiment-basic", - "testRunId": "", - }, - "metrics": { - "end": 0, - }, - "output": { - "completion": "done", - }, - "root_span_id": "", - "scores": { - "pass": 1, - }, - "span_id": "", - }, - { - "context": { - "caller_filename": "/e2e/scenarios/experiment-basic.ts", - "caller_functionname": "experiment.traced.name", - "caller_lineno": 32, - }, - "created": "", - "experiment_id": "experiment:logger-e2e", - "id": "", - "input": { - "testRunId": "", - "tool": "lookup", - }, - "metadata": { - "stage": "child", - "testRunId": "", - }, - "metrics": { - "end": 0, - "start": 0, - }, - "output": { - "status": "success", - "tool": "lookup", - }, - "root_span_id": "", - "span_attributes": { - "exec_counter": 1, - "name": "tool-span", - }, - "span_id": "", - "span_parents": [ - "", - ], - }, - ], - }, -] -`; - -exports[`experiment-basic registers an experiment and sends normalized experiment logs > root-span 1`] = ` -{ - "context": { - "caller_filename": "/e2e/scenarios/experiment-basic.ts", - "caller_functionname": "main", - "caller_lineno": 13, - }, - "created": "", - "expected": { - "completion": "done", - }, - "experiment_id": "experiment:logger-e2e", - "id": "", - "input": { - "prompt": "Run an evaluation", - "testRunId": "", - }, - "metadata": { - "record": "sample-1", - "scenario": "experiment-basic", - "testRunId": "", - }, - "metrics": { - "end": 0, - "start": 0, - }, - "output": { - "completion": "done", - }, - "root_span_id": "", - "scores": { - "pass": 1, - }, - "span_attributes": { - "exec_counter": 0, - "name": "experiment-root", - "type": "eval", - }, - "span_id": "", -} -`; - -exports[`experiment-basic registers an experiment and sends normalized experiment logs > tool-span 1`] = ` -{ - "context": { - "caller_filename": "/e2e/scenarios/experiment-basic.ts", - "caller_functionname": "experiment.traced.name", - "caller_lineno": 32, - }, - "created": "", - "experiment_id": "experiment:logger-e2e", - "id": "", - "input": { - "testRunId": "", - "tool": "lookup", - }, - "metadata": { - "stage": "child", - "testRunId": "", - }, - "metrics": { - "end": 0, - "start": 0, - }, - "output": { - "status": "success", - "tool": "lookup", - }, - "root_span_id": "", - "span_attributes": { - "exec_counter": 1, - "name": "tool-span", - }, - "span_id": "", - "span_parents": [ - "", - ], -} -`; diff --git a/e2e/tests/__snapshots__/logger-basic.test.ts.snap b/e2e/tests/__snapshots__/logger-basic.test.ts.snap deleted file mode 100644 index bb41ce2ad..000000000 --- a/e2e/tests/__snapshots__/logger-basic.test.ts.snap +++ /dev/null @@ -1,182 +0,0 @@ -// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html - -exports[`logger-basic registers a project and sends normalized project logs > child-span 1`] = ` -{ - "context": { - "caller_filename": "/e2e/scenarios/logger-basic.ts", - "caller_functionname": "logger.traced.name", - "caller_lineno": 26, - }, - "created": "", - "id": "", - "input": { - "step": "child-work", - "testRunId": "", - }, - "log_id": "g", - "metadata": { - "stage": "child", - "testRunId": "", - }, - "metrics": { - "end": 0, - "start": 0, - }, - "output": { - "detail": "child completed", - }, - "project_id": "project:e2e-project-logs", - "root_span_id": "", - "span_attributes": { - "exec_counter": 1, - "name": "child-span", - }, - "span_id": "", - "span_parents": [ - "", - ], -} -`; - -exports[`logger-basic registers a project and sends normalized project logs > logs3-payloads 1`] = ` -[ - { - "api_version": 2, - "rows": [ - { - "_is_merge": false, - "context": { - "caller_filename": "/e2e/scenarios/logger-basic.ts", - "caller_functionname": "main", - "caller_lineno": 9, - }, - "created": "", - "id": "", - "input": { - "question": "What is 2 + 2?", - "testRunId": "", - }, - "log_id": "g", - "metadata": { - "testRunId": "", - }, - "metrics": { - "start": 0, - }, - "project_id": "project:e2e-project-logs", - "root_span_id": "", - "span_attributes": { - "exec_counter": 0, - "name": "root-span", - "type": "task", - }, - "span_id": "", - }, - ], - }, - { - "api_version": 2, - "rows": [ - { - "_is_merge": true, - "id": "", - "log_id": "g", - "metadata": { - "scenario": "logger-basic", - "stage": "root", - "testRunId": "", - }, - "metrics": { - "end": 0, - }, - "output": { - "answer": "4", - "explanation": "basic arithmetic", - }, - "project_id": "project:e2e-project-logs", - "root_span_id": "", - "scores": { - "correct": 1, - }, - "span_id": "", - }, - { - "context": { - "caller_filename": "/e2e/scenarios/logger-basic.ts", - "caller_functionname": "logger.traced.name", - "caller_lineno": 26, - }, - "created": "", - "id": "", - "input": { - "step": "child-work", - "testRunId": "", - }, - "log_id": "g", - "metadata": { - "stage": "child", - "testRunId": "", - }, - "metrics": { - "end": 0, - "start": 0, - }, - "output": { - "detail": "child completed", - }, - "project_id": "project:e2e-project-logs", - "root_span_id": "", - "span_attributes": { - "exec_counter": 1, - "name": "child-span", - }, - "span_id": "", - "span_parents": [ - "", - ], - }, - ], - }, -] -`; - -exports[`logger-basic registers a project and sends normalized project logs > root-span 1`] = ` -{ - "context": { - "caller_filename": "/e2e/scenarios/logger-basic.ts", - "caller_functionname": "main", - "caller_lineno": 9, - }, - "created": "", - "id": "", - "input": { - "question": "What is 2 + 2?", - "testRunId": "", - }, - "log_id": "g", - "metadata": { - "scenario": "logger-basic", - "stage": "root", - "testRunId": "", - }, - "metrics": { - "end": 0, - "start": 0, - }, - "output": { - "answer": "4", - "explanation": "basic arithmetic", - }, - "project_id": "project:e2e-project-logs", - "root_span_id": "", - "scores": { - "correct": 1, - }, - "span_attributes": { - "exec_counter": 0, - "name": "root-span", - "type": "task", - }, - "span_id": "", -} -`; diff --git a/e2e/tests/__snapshots__/openai-auto-instrumentation-node-hook.test.ts.snap b/e2e/tests/__snapshots__/openai-auto-instrumentation-node-hook.test.ts.snap new file mode 100644 index 000000000..d7011e783 --- /dev/null +++ b/e2e/tests/__snapshots__/openai-auto-instrumentation-node-hook.test.ts.snap @@ -0,0 +1,157 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`openai auto-instrumentation via node hook collects traces without manual wrapping (openai 4.104.0) > span-events 1`] = ` +[ + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": "4.104.0", + "operation": null, + "provider": null, + "scenario": "openai-auto-instrumentation-node-hook", + }, + "metric_keys": [], + "name": "openai-auto-hook-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task", + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "has_model": true, + "openaiSdkVersion": null, + "operation": null, + "provider": "openai", + "scenario": null, + }, + "metric_keys": [ + "completion_accepted_prediction_tokens", + "completion_audio_tokens", + "completion_reasoning_tokens", + "completion_rejected_prediction_tokens", + "completion_tokens", + "prompt_audio_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens", + ], + "name": "Chat Completion", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": "llm", + }, +] +`; + +exports[`openai auto-instrumentation via node hook collects traces without manual wrapping (openai 5.11.0) > span-events 1`] = ` +[ + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": "5.11.0", + "operation": null, + "provider": null, + "scenario": "openai-auto-instrumentation-node-hook", + }, + "metric_keys": [], + "name": "openai-auto-hook-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task", + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "has_model": true, + "openaiSdkVersion": null, + "operation": null, + "provider": "openai", + "scenario": null, + }, + "metric_keys": [ + "completion_accepted_prediction_tokens", + "completion_audio_tokens", + "completion_reasoning_tokens", + "completion_rejected_prediction_tokens", + "completion_tokens", + "prompt_audio_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens", + ], + "name": "Chat Completion", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": "llm", + }, +] +`; + +exports[`openai auto-instrumentation via node hook collects traces without manual wrapping (openai 6.25.0) > span-events 1`] = ` +[ + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": "6.25.0", + "operation": null, + "provider": null, + "scenario": "openai-auto-instrumentation-node-hook", + }, + "metric_keys": [], + "name": "openai-auto-hook-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task", + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "has_model": true, + "openaiSdkVersion": null, + "operation": null, + "provider": "openai", + "scenario": null, + }, + "metric_keys": [ + "completion_accepted_prediction_tokens", + "completion_audio_tokens", + "completion_reasoning_tokens", + "completion_rejected_prediction_tokens", + "completion_tokens", + "prompt_audio_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens", + ], + "name": "Chat Completion", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": "llm", + }, +] +`; diff --git a/e2e/tests/__snapshots__/otel-compat-mixed-tracing.test.ts.snap b/e2e/tests/__snapshots__/otel-compat-mixed-tracing.test.ts.snap new file mode 100644 index 000000000..8ee531240 --- /dev/null +++ b/e2e/tests/__snapshots__/otel-compat-mixed-tracing.test.ts.snap @@ -0,0 +1,46 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`otel-compat-mixed-tracing unifies Braintrust and OTEL spans into one trace > braintrust-span-events 1`] = ` +[ + { + "error": null, + "input": null, + "metadata": { + "scenario": "otel-compat-mixed-tracing", + "testRunId": "", + }, + "name": "bt-root", + "output": null, + "root_span_id": "", + "span_attributes": { + "exec_counter": 0, + "name": "bt-root", + "type": "task", + }, + "span_id": "", + "span_parents": null, + }, + { + "error": null, + "input": null, + "metadata": { + "kind": "bt-child-under-otel", + "testRunId": "", + }, + "name": "bt-child-under-otel", + "output": { + "source": "otel-child-context", + }, + "root_span_id": "", + "span_attributes": { + "exec_counter": 1, + "name": "bt-child-under-otel", + "type": "task", + }, + "span_id": "", + "span_parents": [ + "", + ], + }, +] +`; diff --git a/e2e/tests/__snapshots__/trace-context-and-continuation.test.ts.snap b/e2e/tests/__snapshots__/trace-context-and-continuation.test.ts.snap new file mode 100644 index 000000000..2c327a559 --- /dev/null +++ b/e2e/tests/__snapshots__/trace-context-and-continuation.test.ts.snap @@ -0,0 +1,134 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`trace-context-and-continuation supports reattachment and late span updates > late-update-payloads 1`] = ` +[ + { + "context": { + "caller_filename": "/e2e/scenarios/trace-context-and-continuation.ts", + "caller_functionname": "main", + "caller_lineno": 70, + }, + "created": "", + "id": "", + "log_id": "g", + "metadata": { + "kind": "late-update", + "testRunId": "", + }, + "metrics": { + "end": 0, + "start": 0, + }, + "project_id": "", + "root_span_id": "", + "span_attributes": { + "exec_counter": 3, + "name": "late-update", + "type": "task", + }, + "span_id": "", + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metadata": { + "kind": "late-update", + "patched": true, + "testRunId": "", + }, + "output": { + "state": "updated", + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + }, +] +`; + +exports[`trace-context-and-continuation supports reattachment and late span updates > span-events 1`] = ` +[ + { + "error": null, + "input": null, + "metadata": { + "scenario": "trace-context-and-continuation", + "testRunId": "", + }, + "name": "context-root", + "output": null, + "root_span_id": "", + "span_attributes": { + "exec_counter": 0, + "name": "context-root", + "type": "task", + }, + "span_id": "", + "span_parents": null, + }, + { + "error": null, + "input": null, + "metadata": { + "kind": "current-child", + "testRunId": "", + }, + "name": "current-child", + "output": { + "source": "withCurrent", + }, + "root_span_id": "", + "span_attributes": { + "exec_counter": 1, + "name": "current-child", + }, + "span_id": "", + "span_parents": [ + "", + ], + }, + { + "error": null, + "input": null, + "metadata": { + "kind": "reattached-child", + "testRunId": "", + }, + "name": "reattached-child", + "output": { + "resumed": true, + }, + "root_span_id": "", + "span_attributes": { + "exec_counter": 2, + "name": "reattached-child", + }, + "span_id": "", + "span_parents": [ + "", + ], + }, + { + "error": null, + "input": null, + "metadata": { + "kind": "late-update", + "patched": true, + "testRunId": "", + }, + "name": "late-update", + "output": { + "state": "updated", + }, + "root_span_id": "", + "span_attributes": { + "exec_counter": 3, + "name": "late-update", + "type": "task", + }, + "span_id": "", + "span_parents": null, + }, +] +`; diff --git a/e2e/tests/__snapshots__/trace-primitives-basic.test.ts.snap b/e2e/tests/__snapshots__/trace-primitives-basic.test.ts.snap new file mode 100644 index 000000000..0f736c84f --- /dev/null +++ b/e2e/tests/__snapshots__/trace-primitives-basic.test.ts.snap @@ -0,0 +1,379 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`trace-primitives-basic collects a minimal manual trace tree > request-flow 1`] = ` +[ + { + "headers": null, + "jsonBody": null, + "method": "POST", + "path": "/api/apikey/login", + "query": null, + "rawBody": null, + }, + { + "headers": null, + "jsonBody": { + "org_id": "mock-org-id", + "project_name": "e2e-trace-primitives-basic-e2e-", + }, + "method": "POST", + "path": "/api/project/register", + "query": null, + "rawBody": { + "org_id": "mock-org-id", + "project_name": "e2e-trace-primitives-basic-e2e-", + }, + }, + { + "headers": null, + "jsonBody": null, + "method": "GET", + "path": "/version", + "query": null, + "rawBody": null, + }, + { + "headers": null, + "jsonBody": { + "api_version": 2, + "rows": [ + { + "_is_merge": false, + "context": { + "caller_filename": "/e2e/scenarios/trace-primitives-basic.ts", + "caller_functionname": "main", + "caller_lineno": 10, + }, + "created": "", + "id": "", + "input": { + "scenario": "trace-primitives-basic", + "testRunId": "", + }, + "log_id": "g", + "metadata": { + "scenario": "trace-primitives-basic", + "testRunId": "", + }, + "metrics": { + "start": 0, + }, + "project_id": "", + "root_span_id": "", + "span_attributes": { + "exec_counter": 0, + "name": "trace-primitives-root", + "type": "task", + }, + "span_id": "", + }, + ], + }, + "method": "POST", + "path": "/logs3", + "query": null, + "rawBody": { + "api_version": 2, + "rows": [ + { + "_is_merge": false, + "context": { + "caller_filename": "/e2e/scenarios/trace-primitives-basic.ts", + "caller_functionname": "main", + "caller_lineno": 10, + }, + "created": "", + "id": "", + "input": { + "scenario": "trace-primitives-basic", + "testRunId": "", + }, + "log_id": "g", + "metadata": { + "scenario": "trace-primitives-basic", + "testRunId": "", + }, + "metrics": { + "start": 0, + }, + "project_id": "", + "root_span_id": "", + "span_attributes": { + "exec_counter": 0, + "name": "trace-primitives-root", + "type": "task", + }, + "span_id": "", + }, + ], + }, + }, + { + "headers": null, + "jsonBody": { + "api_version": 2, + "rows": [ + { + "context": { + "caller_filename": "/e2e/scenarios/trace-primitives-basic.ts", + "caller_functionname": "logger.traced.name", + "caller_lineno": 12, + }, + "created": "", + "id": "", + "input": { + "step": "child", + "testRunId": "", + }, + "log_id": "g", + "metadata": { + "kind": "basic-child", + "testRunId": "", + }, + "metrics": { + "end": 0, + "start": 0, + }, + "output": { + "ok": true, + }, + "project_id": "", + "root_span_id": "", + "span_attributes": { + "exec_counter": 1, + "name": "basic-child", + }, + "span_id": "", + "span_parents": [ + "", + ], + }, + { + "context": { + "caller_filename": "/e2e/scenarios/trace-primitives-basic.ts", + "caller_functionname": "logger.traced.name", + "caller_lineno": 32, + }, + "created": "", + "error": "basic boom + +Error: basic boom + at logger.traced.name (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/e2e/scenarios/trace-primitives-basic.ts:41:27) + at /Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:7507:63 + at AsyncLocalStorage.run (node:internal/async_local_storage/async_context_frame:63:14) + at BraintrustContextManager.runInContext (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:4421:30) + at withCurrent (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:7507:38) + at /Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:5723:18 + at runCatchFinally (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:1551:17) + at _class9.traced (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:5720:17) + at main (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/e2e/scenarios/trace-primitives-basic.ts:10:16) + at runMain (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/e2e/scenarios/helpers.ts:40:8)", + "id": "", + "log_id": "g", + "metadata": { + "kind": "basic-error", + "testRunId": "", + }, + "metrics": { + "end": 0, + "start": 0, + }, + "project_id": "", + "root_span_id": "", + "span_attributes": { + "exec_counter": 2, + "name": "basic-error", + }, + "span_id": "", + "span_parents": [ + "", + ], + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metrics": { + "end": 0, + }, + "output": { + "status": "ok", + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + }, + ], + }, + "method": "POST", + "path": "/logs3", + "query": null, + "rawBody": { + "api_version": 2, + "rows": [ + { + "context": { + "caller_filename": "/e2e/scenarios/trace-primitives-basic.ts", + "caller_functionname": "logger.traced.name", + "caller_lineno": 12, + }, + "created": "", + "id": "", + "input": { + "step": "child", + "testRunId": "", + }, + "log_id": "g", + "metadata": { + "kind": "basic-child", + "testRunId": "", + }, + "metrics": { + "end": 0, + "start": 0, + }, + "output": { + "ok": true, + }, + "project_id": "", + "root_span_id": "", + "span_attributes": { + "exec_counter": 1, + "name": "basic-child", + }, + "span_id": "", + "span_parents": [ + "", + ], + }, + { + "context": { + "caller_filename": "/e2e/scenarios/trace-primitives-basic.ts", + "caller_functionname": "logger.traced.name", + "caller_lineno": 32, + }, + "created": "", + "error": "basic boom + +Error: basic boom + at logger.traced.name (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/e2e/scenarios/trace-primitives-basic.ts:41:27) + at /Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:7507:63 + at AsyncLocalStorage.run (node:internal/async_local_storage/async_context_frame:63:14) + at BraintrustContextManager.runInContext (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:4421:30) + at withCurrent (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:7507:38) + at /Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:5723:18 + at runCatchFinally (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:1551:17) + at _class9.traced (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:5720:17) + at main (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/e2e/scenarios/trace-primitives-basic.ts:10:16) + at runMain (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/e2e/scenarios/helpers.ts:40:8)", + "id": "", + "log_id": "g", + "metadata": { + "kind": "basic-error", + "testRunId": "", + }, + "metrics": { + "end": 0, + "start": 0, + }, + "project_id": "", + "root_span_id": "", + "span_attributes": { + "exec_counter": 2, + "name": "basic-error", + }, + "span_id": "", + "span_parents": [ + "", + ], + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metrics": { + "end": 0, + }, + "output": { + "status": "ok", + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + }, + ], + }, + }, +] +`; + +exports[`trace-primitives-basic collects a minimal manual trace tree > span-events 1`] = ` +[ + { + "error": null, + "input": { + "scenario": "trace-primitives-basic", + "testRunId": "", + }, + "metadata": { + "scenario": "trace-primitives-basic", + "testRunId": "", + }, + "name": "trace-primitives-root", + "output": { + "status": "ok", + }, + "root_span_id": "", + "span_attributes": { + "exec_counter": 0, + "name": "trace-primitives-root", + "type": "task", + }, + "span_id": "", + "span_parents": null, + }, + { + "error": null, + "input": { + "step": "child", + "testRunId": "", + }, + "metadata": { + "kind": "basic-child", + "testRunId": "", + }, + "name": "basic-child", + "output": { + "ok": true, + }, + "root_span_id": "", + "span_attributes": { + "exec_counter": 1, + "name": "basic-child", + }, + "span_id": "", + "span_parents": [ + "", + ], + }, + { + "error": "basic boom", + "input": null, + "metadata": { + "kind": "basic-error", + "testRunId": "", + }, + "name": "basic-error", + "output": null, + "root_span_id": "", + "span_attributes": { + "exec_counter": 2, + "name": "basic-error", + }, + "span_id": "", + "span_parents": [ + "", + ], + }, +] +`; diff --git a/e2e/tests/__snapshots__/wrap-openai-conversation-traces.test.ts.snap b/e2e/tests/__snapshots__/wrap-openai-conversation-traces.test.ts.snap new file mode 100644 index 000000000..77dec2548 --- /dev/null +++ b/e2e/tests/__snapshots__/wrap-openai-conversation-traces.test.ts.snap @@ -0,0 +1,496 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`wrap-openai-conversation-traces logs wrapped chat and responses traces (openai 4.104.0) > span-events 1`] = ` +[ + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": "4.104.0", + "operation": null, + "provider": null, + "scenario": "wrap-openai-conversation-traces", + }, + "metric_keys": [], + "name": "openai-wrapper-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task", + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": null, + "operation": null, + "provider": null, + "scenario": null, + }, + "metric_keys": [], + "name": "openai-chat-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": null, + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "has_model": true, + "openaiSdkVersion": null, + "operation": null, + "provider": "openai", + "scenario": null, + }, + "metric_keys": [ + "completion_accepted_prediction_tokens", + "completion_audio_tokens", + "completion_reasoning_tokens", + "completion_rejected_prediction_tokens", + "completion_tokens", + "prompt_audio_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens", + ], + "name": "Chat Completion", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": "llm", + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": null, + "operation": null, + "provider": null, + "scenario": null, + }, + "metric_keys": [], + "name": "openai-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": null, + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "has_model": true, + "openaiSdkVersion": null, + "operation": null, + "provider": "openai", + "scenario": null, + }, + "metric_keys": [ + "completion_accepted_prediction_tokens", + "completion_audio_tokens", + "completion_reasoning_tokens", + "completion_rejected_prediction_tokens", + "completion_tokens", + "prompt_audio_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens", + ], + "name": "Chat Completion", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": "llm", + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": null, + "operation": null, + "provider": null, + "scenario": null, + }, + "metric_keys": [], + "name": "openai-responses-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": null, + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "has_model": true, + "openaiSdkVersion": null, + "operation": null, + "provider": "openai", + "scenario": null, + }, + "metric_keys": [ + "completion_reasoning_tokens", + "completion_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens", + ], + "name": "openai.responses.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": "llm", + }, +] +`; + +exports[`wrap-openai-conversation-traces logs wrapped chat and responses traces (openai 5.11.0) > span-events 1`] = ` +[ + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": "5.11.0", + "operation": null, + "provider": null, + "scenario": "wrap-openai-conversation-traces", + }, + "metric_keys": [], + "name": "openai-wrapper-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task", + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": null, + "operation": null, + "provider": null, + "scenario": null, + }, + "metric_keys": [], + "name": "openai-chat-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": null, + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "has_model": true, + "openaiSdkVersion": null, + "operation": null, + "provider": "openai", + "scenario": null, + }, + "metric_keys": [ + "completion_accepted_prediction_tokens", + "completion_audio_tokens", + "completion_reasoning_tokens", + "completion_rejected_prediction_tokens", + "completion_tokens", + "prompt_audio_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens", + ], + "name": "Chat Completion", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": "llm", + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": null, + "operation": null, + "provider": null, + "scenario": null, + }, + "metric_keys": [], + "name": "openai-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": null, + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "has_model": true, + "openaiSdkVersion": null, + "operation": null, + "provider": "openai", + "scenario": null, + }, + "metric_keys": [ + "completion_accepted_prediction_tokens", + "completion_audio_tokens", + "completion_reasoning_tokens", + "completion_rejected_prediction_tokens", + "completion_tokens", + "prompt_audio_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens", + ], + "name": "Chat Completion", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": "llm", + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": null, + "operation": null, + "provider": null, + "scenario": null, + }, + "metric_keys": [], + "name": "openai-responses-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": null, + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "has_model": true, + "openaiSdkVersion": null, + "operation": null, + "provider": "openai", + "scenario": null, + }, + "metric_keys": [ + "completion_reasoning_tokens", + "completion_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens", + ], + "name": "openai.responses.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": "llm", + }, +] +`; + +exports[`wrap-openai-conversation-traces logs wrapped chat and responses traces (openai 6.25.0) > span-events 1`] = ` +[ + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": "6.25.0", + "operation": null, + "provider": null, + "scenario": "wrap-openai-conversation-traces", + }, + "metric_keys": [], + "name": "openai-wrapper-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task", + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": null, + "operation": null, + "provider": null, + "scenario": null, + }, + "metric_keys": [], + "name": "openai-chat-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": null, + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "has_model": true, + "openaiSdkVersion": null, + "operation": null, + "provider": "openai", + "scenario": null, + }, + "metric_keys": [ + "completion_accepted_prediction_tokens", + "completion_audio_tokens", + "completion_reasoning_tokens", + "completion_rejected_prediction_tokens", + "completion_tokens", + "prompt_audio_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens", + ], + "name": "Chat Completion", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": "llm", + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": null, + "operation": null, + "provider": null, + "scenario": null, + }, + "metric_keys": [], + "name": "openai-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": null, + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "has_model": true, + "openaiSdkVersion": null, + "operation": null, + "provider": "openai", + "scenario": null, + }, + "metric_keys": [ + "completion_accepted_prediction_tokens", + "completion_audio_tokens", + "completion_reasoning_tokens", + "completion_rejected_prediction_tokens", + "completion_tokens", + "prompt_audio_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens", + ], + "name": "Chat Completion", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": "llm", + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "has_model": false, + "openaiSdkVersion": null, + "operation": null, + "provider": null, + "scenario": null, + }, + "metric_keys": [], + "name": "openai-responses-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": null, + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "has_model": true, + "openaiSdkVersion": null, + "operation": null, + "provider": "openai", + "scenario": null, + }, + "metric_keys": [ + "completion_reasoning_tokens", + "completion_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens", + ], + "name": "openai.responses.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "", + ], + "type": "llm", + }, +] +`; diff --git a/e2e/tests/experiment-basic.test.ts b/e2e/tests/experiment-basic.test.ts deleted file mode 100644 index cc55460a8..000000000 --- a/e2e/tests/experiment-basic.test.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { expect, test } from "vitest"; -import { - createTestRunId, - getPayloadsForRun, - getTestServerEnv, - waitForRunEvent, -} from "./helpers/ingestion"; -import { normalizeForSnapshot, type Json } from "./helpers/normalize"; -import { runScenarioOrThrow } from "./helpers/run-scenario"; - -test("experiment-basic registers an experiment and sends normalized experiment logs", async () => { - const testRunId = createTestRunId(); - const rootSpanPromise = waitForRunEvent( - testRunId, - (event) => event.span.name === "experiment-root" && event.span.ended, - ); - const toolSpanPromise = waitForRunEvent( - testRunId, - (event) => event.span.name === "tool-span" && event.span.ended, - ); - - await runScenarioOrThrow( - "scenarios/experiment-basic.ts", - getTestServerEnv(testRunId), - ); - - const [rootSpanEvent, toolSpanEvent] = await Promise.all([ - rootSpanPromise, - toolSpanPromise, - ]); - - expect(normalizeForSnapshot(rootSpanEvent.row as Json)).toMatchSnapshot( - "root-span", - ); - expect(normalizeForSnapshot(toolSpanEvent.row as Json)).toMatchSnapshot( - "tool-span", - ); - - const logs3Payloads = await getPayloadsForRun(testRunId); - expect(normalizeForSnapshot(logs3Payloads as Json)).toMatchSnapshot( - "logs3-payloads", - ); -}); diff --git a/e2e/tests/global-setup.ts b/e2e/tests/global-setup.ts deleted file mode 100644 index 4aaa83e33..000000000 --- a/e2e/tests/global-setup.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { startMockBraintrustServer } from "./helpers/mock-braintrust-server"; - -export default async function globalSetup(context: { - provide: (key: string, value: string) => void; -}) { - const server = await startMockBraintrustServer(); - - context.provide("mockBraintrustApiKey", server.apiKey); - context.provide("mockBraintrustUrl", server.url); - - return async () => { - await server.close(); - }; -} diff --git a/e2e/tests/helpers/ingestion.ts b/e2e/tests/helpers/ingestion.ts deleted file mode 100644 index a4de2dde7..000000000 --- a/e2e/tests/helpers/ingestion.ts +++ /dev/null @@ -1,171 +0,0 @@ -import { randomUUID } from "node:crypto"; -import { inject } from "vitest"; -import type { - CapturedLogEvent, - CapturedLogEventBatch, - CapturedLogPayload, - CapturedLogPayloadBatch, -} from "./mock-braintrust-server"; - -const MOCK_BRAINTRUST_URL_KEY = "mockBraintrustUrl"; -const MOCK_BRAINTRUST_API_KEY_KEY = "mockBraintrustApiKey"; -const DEFAULT_EVENT_TIMEOUT_MS = 5_000; -const DEFAULT_POLL_INTERVAL_MS = 50; - -export type EventPredicate = (event: CapturedLogEvent) => boolean; -export type PayloadPredicate = (payload: CapturedLogPayload) => boolean; - -export type WaitForEventOptions = { - pollIntervalMs?: number; - timeoutMs?: number; -}; - -function isRecord(value: unknown): value is Record { - return typeof value === "object" && value !== null && !Array.isArray(value); -} - -function injectedString(key: string): string { - return inject(key as never) as string; -} - -function controlUrl(path: string): URL { - return new URL(path, injectedString(MOCK_BRAINTRUST_URL_KEY)); -} - -async function fetchControl( - path: string, - body: Record, -): Promise { - const response = await fetch(controlUrl(path), { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify(body), - }); - - if (!response.ok) { - throw new Error( - `Test server request failed: ${response.status} ${response.statusText}`, - ); - } - - return (await response.json()) as T; -} - -function hasTestRunId(value: unknown, testRunId: string): boolean { - if (Array.isArray(value)) { - return value.some((entry) => hasTestRunId(entry, testRunId)); - } - - if (!isRecord(value)) { - return false; - } - - if (value.testRunId === testRunId) { - return true; - } - - return Object.values(value).some((entry) => hasTestRunId(entry, testRunId)); -} - -function eventBatch(after = 0): Promise { - return fetchControl("/_mock/events", { after }); -} - -function payloadBatch(after = 0): Promise { - return fetchControl("/_mock/payloads", { after }); -} - -function delay(ms: number): Promise { - return new Promise((resolve) => { - setTimeout(resolve, ms); - }); -} - -export function createTestRunId(): string { - return `e2e-${randomUUID()}`; -} - -export function getTestServerEnv(testRunId: string): Record { - const url = injectedString(MOCK_BRAINTRUST_URL_KEY); - return { - BRAINTRUST_API_KEY: injectedString(MOCK_BRAINTRUST_API_KEY_KEY), - BRAINTRUST_API_URL: url, - BRAINTRUST_APP_URL: url, - BRAINTRUST_E2E_RUN_ID: testRunId, - }; -} - -export function isTestRunEvent( - event: CapturedLogEvent, - testRunId: string, -): boolean { - return hasTestRunId(event.row, testRunId); -} - -export function isTestRunPayload( - payload: CapturedLogPayload, - testRunId: string, -): boolean { - return payload.rows.some((row) => hasTestRunId(row, testRunId)); -} - -export async function getEvents( - predicate?: EventPredicate, -): Promise { - const { events } = await eventBatch(); - return predicate ? events.filter(predicate) : events; -} - -export async function getPayloads( - predicate?: PayloadPredicate, -): Promise { - const { payloads } = await payloadBatch(); - return predicate ? payloads.filter(predicate) : payloads; -} - -export async function getPayloadsForRun( - testRunId: string, -): Promise { - return await getPayloads((payload) => isTestRunPayload(payload, testRunId)); -} - -export async function waitForEvent( - predicate: EventPredicate, - options: WaitForEventOptions = {}, -): Promise { - const timeoutMs = options.timeoutMs ?? DEFAULT_EVENT_TIMEOUT_MS; - const pollIntervalMs = options.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS; - const deadline = Date.now() + timeoutMs; - let cursor = 0; - - while (Date.now() <= deadline) { - const batch = await eventBatch(cursor); - cursor = batch.cursor; - - const match = batch.events.find(predicate); - if (match) { - return match; - } - - if (Date.now() >= deadline) { - break; - } - - await delay(Math.min(pollIntervalMs, Math.max(deadline - Date.now(), 0))); - } - - throw new Error( - `Timed out waiting for a matching event after ${timeoutMs}ms`, - ); -} - -export async function waitForRunEvent( - testRunId: string, - predicate: EventPredicate, - options: WaitForEventOptions = {}, -): Promise { - return await waitForEvent( - (event) => isTestRunEvent(event, testRunId) && predicate(event), - options, - ); -} diff --git a/e2e/tests/helpers/mock-braintrust-server.ts b/e2e/tests/helpers/mock-braintrust-server.ts index 937627924..2369c3abd 100644 --- a/e2e/tests/helpers/mock-braintrust-server.ts +++ b/e2e/tests/helpers/mock-braintrust-server.ts @@ -1,5 +1,10 @@ +import { randomUUID } from "node:crypto"; import { createServer } from "node:http"; -import type { IncomingHttpHeaders, ServerResponse } from "node:http"; +import type { + IncomingHttpHeaders, + IncomingMessage, + ServerResponse, +} from "node:http"; import type { AddressInfo } from "node:net"; export type JsonValue = @@ -10,8 +15,6 @@ export type JsonValue = | JsonValue[] | { [key: string]: JsonValue }; -export type JsonObject = { [key: string]: JsonValue }; - export interface CapturedRequest { method: string; path: string; @@ -55,46 +58,22 @@ export type CapturedLogEvent = { export interface MockBraintrustServer { apiKey: string; close: () => Promise; - url: string; -} - -export type CapturedLogEventBatch = { - cursor: number; events: CapturedLogEvent[]; -}; - -export type CapturedLogPayloadBatch = { - cursor: number; payloads: CapturedLogPayload[]; -}; + requests: CapturedRequest[]; + url: string; +} -const CONTROL_ROUTE_PREFIX = "/_mock"; const DEFAULT_API_KEY = "mock-braintrust-api-key"; -type ProjectRecord = { - id: string; - name: string; -}; - -type ExperimentRecord = { - created: string; - id: string; - name: string; - projectId: string; - projectName: string; -}; - -function slugify(value: string): string { - return value - .toLowerCase() - .replace(/[^a-z0-9]+/g, "-") - .replace(/^-+|-+$/g, ""); -} - function isRecord(value: unknown): value is Record { return typeof value === "object" && value !== null && !Array.isArray(value); } +function clone(value: T): T { + return structuredClone(value); +} + function normalizeHeaders( headers: IncomingHttpHeaders, ): Record { @@ -116,31 +95,14 @@ function parseJson(rawBody: string): JsonValue | null { return null; } - return JSON.parse(rawBody) as JsonValue; -} - -function respondJson( - response: ServerResponse, - statusCode: number, - body: unknown, -): void { - response.writeHead(statusCode, { "Content-Type": "application/json" }); - response.end(JSON.stringify(body)); -} - -function isAuthorized( - headers: Record, - apiKey: string, -): boolean { - return headers.authorization === `Bearer ${apiKey}`; -} - -function clone(value: T): T { - return structuredClone(value); + try { + return JSON.parse(rawBody) as JsonValue; + } catch { + return null; + } } -function parsePayload(request: CapturedRequest): CapturedLogPayload | null { - const body = request.jsonBody; +function parsePayloadBody(body: JsonValue | null): CapturedLogPayload | null { if (!isRecord(body) || !Array.isArray(body.rows)) { return null; } @@ -156,6 +118,10 @@ function parsePayload(request: CapturedRequest): CapturedLogPayload | null { }; } +function parsePayload(request: CapturedRequest): CapturedLogPayload | null { + return parsePayloadBody(request.jsonBody); +} + function rowKey(row: CapturedLogRow): string { return JSON.stringify( [ @@ -247,6 +213,40 @@ function toCapturedLogEvent( }; } +function respondJson( + response: ServerResponse, + statusCode: number, + body: unknown, +): void { + response.writeHead(statusCode, { "Content-Type": "application/json" }); + response.end(JSON.stringify(body)); +} + +async function readRequestBody(req: IncomingMessage): Promise { + return await new Promise((resolve, reject) => { + const chunks: Buffer[] = []; + req.on("data", (chunk) => chunks.push(Buffer.from(chunk))); + req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8"))); + req.on("error", reject); + }); +} + +function capturedRequestFrom( + method: string | undefined, + requestUrl: URL, + headers: IncomingHttpHeaders, + rawBody: string, +): CapturedRequest { + return { + method: method ?? "GET", + path: requestUrl.pathname, + query: Object.fromEntries(requestUrl.searchParams.entries()), + headers: normalizeHeaders(headers), + rawBody, + jsonBody: parseJson(rawBody), + }; +} + export async function startMockBraintrustServer( apiKey = DEFAULT_API_KEY, ): Promise { @@ -254,9 +254,43 @@ export async function startMockBraintrustServer( const payloads: CapturedLogPayload[] = []; const events: CapturedLogEvent[] = []; const mergedRows = new Map(); - const projects = new Map(); - const experiments = new Map(); + const projectsByName = new Map(); let serverUrl = ""; + let xactCursor = 0; + + function nextXactId(): string { + xactCursor += 1; + return String(xactCursor).padStart(12, "0"); + } + + function persistPayload(payload: CapturedLogPayload): void { + payloads.push(payload); + + for (const row of payload.rows) { + const persistedRow = clone(row); + if (typeof persistedRow._xact_id !== "string") { + persistedRow._xact_id = nextXactId(); + } + + const key = rowKey(persistedRow); + const mergedRow = mergeRow(mergedRows.get(key), persistedRow); + mergedRows.set(key, mergedRow); + events.push( + toCapturedLogEvent(payload.api_version, mergedRow, persistedRow), + ); + } + } + + function projectForName(name: string): { id: string; name: string } { + const existing = projectsByName.get(name); + if (existing) { + return existing; + } + + const created = { id: randomUUID(), name }; + projectsByName.set(name, created); + return created; + } const server = createServer((req, res) => { void (async () => { @@ -265,64 +299,16 @@ export async function startMockBraintrustServer( req.url ?? "/", serverUrl || "http://127.0.0.1", ); - const rawBody = await new Promise((resolve, reject) => { - const chunks: Buffer[] = []; - req.on("data", (chunk) => chunks.push(Buffer.from(chunk))); - req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8"))); - req.on("error", reject); - }); - - const capturedRequest: CapturedRequest = { - method: req.method ?? "GET", - path: requestUrl.pathname, - query: Object.fromEntries(requestUrl.searchParams.entries()), - headers: normalizeHeaders(req.headers), + const rawBody = await readRequestBody(req); + const capturedRequest = capturedRequestFrom( + req.method, + requestUrl, + req.headers, rawBody, - jsonBody: parseJson(rawBody), - }; - - if (capturedRequest.path.startsWith(CONTROL_ROUTE_PREFIX)) { - const body = isRecord(capturedRequest.jsonBody) - ? capturedRequest.jsonBody - : {}; - const after = - typeof body.after === "number" && body.after >= 0 ? body.after : 0; - - if ( - capturedRequest.method === "POST" && - capturedRequest.path === `${CONTROL_ROUTE_PREFIX}/events` - ) { - respondJson(res, 200, { - cursor: events.length, - events: events.slice(after), - } satisfies CapturedLogEventBatch); - return; - } - - if ( - capturedRequest.method === "POST" && - capturedRequest.path === `${CONTROL_ROUTE_PREFIX}/payloads` - ) { - respondJson(res, 200, { - cursor: payloads.length, - payloads: payloads.slice(after), - } satisfies CapturedLogPayloadBatch); - return; - } - - respondJson(res, 404, { - error: `Unhandled mock control route: ${capturedRequest.method} ${capturedRequest.path}`, - }); - return; - } + ); requests.push(capturedRequest); - if (!isAuthorized(capturedRequest.headers, apiKey)) { - respondJson(res, 401, { error: "unauthorized" }); - return; - } - if ( capturedRequest.method === "POST" && capturedRequest.path === "/api/apikey/login" @@ -330,11 +316,10 @@ export async function startMockBraintrustServer( respondJson(res, 200, { org_info: [ { - id: "org:e2e", - name: "e2e-org", + id: "mock-org-id", + name: "mock-org", api_url: serverUrl, - proxy_url: null, - git_metadata: { collect: "none" }, + proxy_url: serverUrl, }, ], }); @@ -345,132 +330,23 @@ export async function startMockBraintrustServer( capturedRequest.method === "POST" && capturedRequest.path === "/api/project/register" ) { - const body = (capturedRequest.jsonBody ?? {}) as { - project_name?: string; - }; - const projectName = body.project_name ?? "global"; - const project = projects.get(projectName) ?? { - id: `project:${slugify(projectName) || "global"}`, - name: projectName, - }; - projects.set(projectName, project); - - respondJson(res, 200, { project }); - return; - } - - if ( - capturedRequest.method === "GET" && - capturedRequest.path === "/api/project" - ) { - const projectId = capturedRequest.query.id ?? "project:unknown"; - const project = [...projects.values()].find( - (candidate) => candidate.id === projectId, - ) ?? { - id: projectId, - name: projectId.replace(/^project:/, ""), - }; - - respondJson(res, 200, { name: project.name, project }); - return; - } - - if ( - capturedRequest.method === "POST" && - capturedRequest.path === "/api/experiment/register" - ) { - const body = (capturedRequest.jsonBody ?? {}) as { - project_name?: string; - project_id?: string; - experiment_name?: string; - }; - const projectName = body.project_name ?? body.project_id ?? "project"; - const project = projects.get(projectName) ?? { - id: - body.project_id ?? `project:${slugify(projectName) || "project"}`, - name: projectName, - }; - projects.set(project.name, project); - - const experimentName = body.experiment_name ?? "experiment"; - const experimentKey = `${project.id}:${experimentName}`; - const experiment = experiments.get(experimentKey) ?? { - id: `experiment:${slugify(experimentName) || "experiment"}`, - name: experimentName, - created: "2026-01-01T00:00:00.000Z", - projectId: project.id, - projectName: project.name, - }; - experiments.set(experimentKey, experiment); + const projectName = + isRecord(capturedRequest.jsonBody) && + typeof capturedRequest.jsonBody.project_name === "string" + ? capturedRequest.jsonBody.project_name + : "project"; respondJson(res, 200, { - project, - experiment: { - id: experiment.id, - name: experiment.name, - created: experiment.created, - }, + project: projectForName(projectName), }); return; } - if ( - capturedRequest.method === "POST" && - capturedRequest.path === "/api/experiment/get" - ) { - const body = (capturedRequest.jsonBody ?? {}) as { - project_name?: string; - project_id?: string; - experiment_name?: string; - }; - const projectKey = body.project_name ?? body.project_id ?? "project"; - const project = projects.get(projectKey) ?? { - id: `project:${slugify(projectKey) || "project"}`, - name: projectKey, - }; - const experimentName = body.experiment_name ?? "experiment"; - const experiment = experiments.get( - `${project.id}:${experimentName}`, - ) ?? { - id: `experiment:${slugify(experimentName) || "experiment"}`, - name: experimentName, - created: "2026-01-01T00:00:00.000Z", - projectId: project.id, - projectName: project.name, - }; - - respondJson(res, 200, [ - { - id: experiment.id, - name: experiment.name, - project_id: experiment.projectId, - created: experiment.created, - }, - ]); - return; - } - - if ( - capturedRequest.method === "POST" && - capturedRequest.path === "/api/base_experiment/get_id" - ) { - respondJson(res, 400, { error: "no base experiment" }); - return; - } - - if ( - capturedRequest.method === "GET" && - capturedRequest.path === "/experiment-comparison2" - ) { - respondJson(res, 200, { scores: {}, metrics: {} }); - return; - } - if ( capturedRequest.method === "GET" && capturedRequest.path === "/version" ) { - respondJson(res, 200, { logs3_payload_max_bytes: null }); + respondJson(res, 200, {}); return; } @@ -480,24 +356,22 @@ export async function startMockBraintrustServer( ) { const payload = parsePayload(capturedRequest); if (payload) { - payloads.push(payload); - - for (const row of payload.rows) { - const key = rowKey(row); - const mergedRow = mergeRow(mergedRows.get(key), row); - mergedRows.set(key, mergedRow); - events.push( - toCapturedLogEvent(payload.api_version, mergedRow, row), - ); - } + persistPayload(payload); } + respondJson(res, 200, { ok: true }); + return; + } + if ( + capturedRequest.method === "POST" && + capturedRequest.path === "/otel/v1/traces" + ) { respondJson(res, 200, { ok: true }); return; } respondJson(res, 404, { - error: `Unhandled mock route: ${capturedRequest.method} ${capturedRequest.path}`, + error: `Unhandled mock Braintrust route: ${capturedRequest.method} ${capturedRequest.path}`, }); } catch (error) { respondJson(res, 500, { @@ -520,6 +394,9 @@ export async function startMockBraintrustServer( new Promise((resolve, reject) => { server.close((error) => (error ? reject(error) : resolve())); }), + events, + payloads, + requests, url: serverUrl, }; } diff --git a/e2e/tests/helpers/normalize.ts b/e2e/tests/helpers/normalize.ts index 45240fa6b..eb59bbeaa 100644 --- a/e2e/tests/helpers/normalize.ts +++ b/e2e/tests/helpers/normalize.ts @@ -15,8 +15,15 @@ type TokenMaps = { const ISO_DATE_REGEX = /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z$/; const UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; +const UUID_SUBSTRING_REGEX = + /[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/gi; const TIME_KEYS = new Set(["created", "start", "end"]); const SPAN_ID_KEYS = new Set(["id", "span_id", "root_span_id"]); +const XACT_VERSION_KEYS = new Set([ + "currentVersion", + "initialVersion", + "version", +]); function normalizeCallerFilename(value: string): string { const e2eIndex = value.lastIndexOf("/e2e/"); @@ -27,6 +34,49 @@ function normalizeCallerFilename(value: string): string { return value; } +function normalizeMockServerUrl(value: string): string | undefined { + try { + const url = new URL(value); + if (url.protocol !== "http:" || url.hostname !== "127.0.0.1") { + return undefined; + } + + const suffix = `${url.pathname}${url.search}${url.hash}`; + return suffix === "/" ? "" : `${suffix}`; + } catch { + return undefined; + } +} + +function normalizeObject( + value: { [key: string]: Json }, + tokenMaps: TokenMaps, +): Json { + const callerFilename = + typeof value.caller_filename === "string" + ? value.caller_filename + : undefined; + const isNodeInternalCaller = callerFilename?.startsWith("node:"); + + return Object.fromEntries( + Object.entries(value).map(([key, entry]) => { + if (isNodeInternalCaller) { + if (key === "caller_filename") { + return [key, ""]; + } + if (key === "caller_functionname") { + return [key, ""]; + } + if (key === "caller_lineno") { + return [key, 0]; + } + } + + return [key, normalizeValue(entry as Json, tokenMaps, key)]; + }), + ); +} + function tokenFor( map: Map, rawValue: string, @@ -60,12 +110,7 @@ function normalizeValue( } if (value && typeof value === "object") { - return Object.fromEntries( - Object.entries(value).map(([key, entry]) => [ - key, - normalizeValue(entry as Json, tokenMaps, key), - ]), - ); + return normalizeObject(value, tokenMaps); } if (typeof value === "number") { @@ -76,6 +121,11 @@ function normalizeValue( } if (typeof value === "string") { + const normalizedUrl = normalizeMockServerUrl(value); + if (normalizedUrl) { + return normalizedUrl; + } + if (currentKey === "caller_filename") { return normalizeCallerFilename(value); } @@ -84,6 +134,10 @@ function normalizeValue( return tokenFor(tokenMaps.xacts, value, "xact"); } + if (currentKey && XACT_VERSION_KEYS.has(currentKey)) { + return tokenFor(tokenMaps.xacts, value, "xact"); + } + if (currentKey === "testRunId") { return tokenFor(tokenMaps.runs, value, "run"); } @@ -100,6 +154,13 @@ function normalizeValue( return ""; } + const withNormalizedUuids = value.replace(UUID_SUBSTRING_REGEX, (match) => + tokenFor(tokenMaps.ids, match, "uuid"), + ); + if (withNormalizedUuids !== value) { + return withNormalizedUuids; + } + if (UUID_REGEX.test(value)) { return tokenFor(tokenMaps.ids, value, "uuid"); } diff --git a/e2e/tests/helpers/openai.ts b/e2e/tests/helpers/openai.ts new file mode 100644 index 000000000..b9e7dbbf4 --- /dev/null +++ b/e2e/tests/helpers/openai.ts @@ -0,0 +1,70 @@ +import type { CapturedLogEvent } from "./mock-braintrust-server"; +import type { Json } from "./normalize"; + +export interface OpenAIScenario { + scenarioPath: string; + version: string; +} + +const OPENAI_VERSIONS = [ + { + suffix: "v4", + version: "4.104.0", + }, + { + suffix: "v5", + version: "5.11.0", + }, + { + suffix: "v6", + version: "6.25.0", + }, +] as const; + +export const OPENAI_SCENARIO_TIMEOUT_MS = 60_000; + +export const OPENAI_AUTO_HOOK_SCENARIOS: OpenAIScenario[] = OPENAI_VERSIONS.map( + ({ suffix, version }) => ({ + scenarioPath: `scenarios/openai-auto-instrumentation-node-hook.openai-${suffix}.mjs`, + version, + }), +); + +export const WRAP_OPENAI_SCENARIOS: OpenAIScenario[] = OPENAI_VERSIONS.map( + ({ suffix, version }) => ({ + scenarioPath: `scenarios/wrap-openai-conversation-traces.openai-${suffix}.ts`, + version, + }), +); + +export function summarizeOpenAIContract(event: CapturedLogEvent): Json { + const metadata = event.row.metadata as + | { + metadata?: { operation?: string }; + model?: string; + openaiSdkVersion?: string; + provider?: string; + scenario?: string; + } + | undefined; + + return { + has_input: event.input !== undefined && event.input !== null, + has_output: event.output !== undefined && event.output !== null, + metadata: { + has_model: typeof metadata?.model === "string", + openaiSdkVersion: metadata?.openaiSdkVersion ?? null, + operation: metadata?.metadata?.operation ?? null, + provider: metadata?.provider ?? null, + scenario: metadata?.scenario ?? null, + }, + metric_keys: Object.keys(event.metrics ?? {}) + .filter((key) => key !== "start" && key !== "end") + .sort(), + name: event.span.name ?? null, + root_span_id: event.span.rootId ?? null, + span_id: event.span.id ?? null, + span_parents: event.span.parentIds, + type: event.span.type ?? null, + } satisfies Json; +} diff --git a/e2e/tests/helpers/run-scenario.ts b/e2e/tests/helpers/run-scenario.ts deleted file mode 100644 index 1b4da0299..000000000 --- a/e2e/tests/helpers/run-scenario.ts +++ /dev/null @@ -1,79 +0,0 @@ -import { spawn } from "node:child_process"; -import * as path from "node:path"; - -export interface ScenarioResult { - exitCode: number; - stdout: string; - stderr: string; -} - -const tsxCliPath = require.resolve("tsx/cli"); -const packageRoot = process.cwd(); -const DEFAULT_SCENARIO_TIMEOUT_MS = 15_000; - -export async function runScenario( - relativeScenarioPath: string, - env: Record, - timeoutMs = DEFAULT_SCENARIO_TIMEOUT_MS, -): Promise { - const scenarioPath = path.join(packageRoot, relativeScenarioPath); - - return await new Promise((resolve, reject) => { - const child = spawn(process.execPath, [tsxCliPath, scenarioPath], { - cwd: packageRoot, - env: { - ...process.env, - ...env, - }, - stdio: ["ignore", "pipe", "pipe"], - }); - const timeout = setTimeout(() => { - child.kill("SIGTERM"); - reject( - new Error( - `Scenario ${relativeScenarioPath} timed out after ${timeoutMs}ms`, - ), - ); - }, timeoutMs); - - let stdout = ""; - let stderr = ""; - - child.stdout.on("data", (chunk) => { - stdout += chunk.toString(); - }); - - child.stderr.on("data", (chunk) => { - stderr += chunk.toString(); - }); - - child.on("error", (error) => { - clearTimeout(timeout); - reject(error); - }); - child.on("close", (code) => { - clearTimeout(timeout); - resolve({ - exitCode: code ?? 0, - stdout, - stderr, - }); - }); - }); -} - -export async function runScenarioOrThrow( - relativeScenarioPath: string, - env: Record, - timeoutMs?: number, -): Promise { - const result = await runScenario(relativeScenarioPath, env, timeoutMs); - - if (result.exitCode !== 0) { - throw new Error( - `Scenario ${relativeScenarioPath} failed with exit code ${result.exitCode}\nSTDOUT:\n${result.stdout}\nSTDERR:\n${result.stderr}`, - ); - } - - return result; -} diff --git a/e2e/tests/helpers/scenario-harness.ts b/e2e/tests/helpers/scenario-harness.ts new file mode 100644 index 000000000..452c092ae --- /dev/null +++ b/e2e/tests/helpers/scenario-harness.ts @@ -0,0 +1,200 @@ +import { spawn } from "node:child_process"; +import { randomUUID } from "node:crypto"; +import * as path from "node:path"; +import { + startMockBraintrustServer, + type CapturedLogEvent, + type CapturedLogPayload, + type CapturedRequest, +} from "./mock-braintrust-server"; + +export type EventPredicate = (event: CapturedLogEvent) => boolean; +export type PayloadPredicate = (payload: CapturedLogPayload) => boolean; +export type RequestPredicate = (request: CapturedRequest) => boolean; + +export interface ScenarioResult { + exitCode: number; + stdout: string; + stderr: string; +} + +const tsxCliPath = require.resolve("tsx/cli"); +const packageRoot = process.cwd(); +const DEFAULT_SCENARIO_TIMEOUT_MS = 15_000; + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function hasTestRunId(value: unknown, testRunId: string): boolean { + if (Array.isArray(value)) { + return value.some((entry) => hasTestRunId(entry, testRunId)); + } + + if (!isRecord(value)) { + return false; + } + + if (value.testRunId === testRunId) { + return true; + } + + return Object.values(value).some((entry) => hasTestRunId(entry, testRunId)); +} + +function filterItems(items: T[], predicate?: (item: T) => boolean): T[] { + return predicate ? items.filter(predicate) : [...items]; +} + +function createTestRunId(): string { + return `e2e-${randomUUID()}`; +} + +function getTestServerEnv( + testRunId: string, + server: { apiKey: string; url: string }, +): Record { + return { + BRAINTRUST_API_KEY: server.apiKey, + BRAINTRUST_API_URL: server.url, + BRAINTRUST_APP_URL: server.url, + BRAINTRUST_APP_PUBLIC_URL: server.url, + BRAINTRUST_PROXY_URL: server.url, + BRAINTRUST_E2E_RUN_ID: testRunId, + }; +} + +async function runProcess( + args: string[], + env: Record, + timeoutMs: number, +): Promise { + return await new Promise((resolve, reject) => { + const child = spawn(process.execPath, args, { + cwd: packageRoot, + env: { + ...process.env, + ...env, + }, + stdio: ["ignore", "pipe", "pipe"], + }); + const timeout = setTimeout(() => { + child.kill("SIGTERM"); + reject( + new Error(`Process ${args.join(" ")} timed out after ${timeoutMs}ms`), + ); + }, timeoutMs); + + let stdout = ""; + let stderr = ""; + + child.stdout.on("data", (chunk) => { + stdout += chunk.toString(); + }); + + child.stderr.on("data", (chunk) => { + stderr += chunk.toString(); + }); + + child.on("error", (error) => { + clearTimeout(timeout); + reject(error); + }); + child.on("close", (code) => { + clearTimeout(timeout); + resolve({ + exitCode: code ?? 0, + stdout, + stderr, + }); + }); + }); +} + +async function runScenarioOrThrow( + relativeScenarioPath: string, + env: Record, + options: { + nodeArgs?: string[]; + timeoutMs?: number; + useTsx?: boolean; + } = {}, +): Promise { + const scenarioPath = path.join(packageRoot, relativeScenarioPath); + const args = + options.useTsx === false + ? [...(options.nodeArgs ?? []), scenarioPath] + : [tsxCliPath, scenarioPath]; + const result = await runProcess( + args, + env, + options.timeoutMs ?? DEFAULT_SCENARIO_TIMEOUT_MS, + ); + + if (result.exitCode !== 0) { + throw new Error( + `Scenario ${relativeScenarioPath} failed with exit code ${result.exitCode}\nSTDOUT:\n${result.stdout}\nSTDERR:\n${result.stderr}`, + ); + } + + return result; +} + +export interface ScenarioHarness { + events: (predicate?: EventPredicate) => CapturedLogEvent[]; + payloads: (predicate?: PayloadPredicate) => CapturedLogPayload[]; + requestCursor: () => number; + requestsAfter: ( + after: number, + predicate?: RequestPredicate, + ) => CapturedRequest[]; + runNodeScenario: ( + relativeScenarioPath: string, + args?: string[], + timeoutMs?: number, + ) => Promise; + runScenario: ( + relativeScenarioPath: string, + timeoutMs?: number, + ) => Promise; + testRunEvents: (predicate?: EventPredicate) => CapturedLogEvent[]; + testRunId: string; +} + +export async function withScenarioHarness( + body: (harness: ScenarioHarness) => Promise, +): Promise { + const server = await startMockBraintrustServer(); + const testRunId = createTestRunId(); + const testEnv = getTestServerEnv(testRunId, server); + + try { + await body({ + events: (predicate) => filterItems(server.events, predicate), + payloads: (predicate) => filterItems(server.payloads, predicate), + requestCursor: () => server.requests.length, + requestsAfter: (after, predicate) => + filterItems(server.requests.slice(after), predicate), + runNodeScenario: (relativeScenarioPath, args = [], timeoutMs) => + runScenarioOrThrow(relativeScenarioPath, testEnv, { + nodeArgs: args, + timeoutMs, + useTsx: false, + }), + runScenario: (relativeScenarioPath, timeoutMs) => + runScenarioOrThrow(relativeScenarioPath, testEnv, { + timeoutMs, + }), + testRunEvents: (predicate) => + filterItems( + server.events, + (event) => + hasTestRunId(event.row, testRunId) && + (predicate ? predicate(event) : true), + ), + testRunId, + }); + } finally { + await server.close(); + } +} diff --git a/e2e/tests/helpers/trace-selectors.ts b/e2e/tests/helpers/trace-selectors.ts new file mode 100644 index 000000000..603108f48 --- /dev/null +++ b/e2e/tests/helpers/trace-selectors.ts @@ -0,0 +1,40 @@ +import type { CapturedLogEvent } from "./mock-braintrust-server"; + +export function findLatestEvent( + events: CapturedLogEvent[], + predicate: (event: CapturedLogEvent) => boolean, +): CapturedLogEvent | undefined { + return [...events].reverse().find(predicate); +} + +export function findLatestSpan( + events: CapturedLogEvent[], + name: string, +): CapturedLogEvent | undefined { + return findLatestEvent(events, (event) => event.span.name === name); +} + +export function findLatestChildSpan( + events: CapturedLogEvent[], + name: string, + parentId: string | undefined, +): CapturedLogEvent | undefined { + if (!parentId) { + return undefined; + } + + return ( + findLatestEvent( + events, + (event) => + event.span.name === name && + event.span.parentIds.includes(parentId) && + event.output !== undefined, + ) ?? + findLatestEvent( + events, + (event) => + event.span.name === name && event.span.parentIds.includes(parentId), + ) + ); +} diff --git a/e2e/tests/helpers/trace-summary.ts b/e2e/tests/helpers/trace-summary.ts new file mode 100644 index 000000000..8dde332dc --- /dev/null +++ b/e2e/tests/helpers/trace-summary.ts @@ -0,0 +1,148 @@ +import type { + CapturedLogEvent, + CapturedRequest, + JsonValue, +} from "./mock-braintrust-server"; +import type { Json } from "./normalize"; + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +export function summarizeEvent(event: CapturedLogEvent): Json { + const row = event.row as Record; + const error = + typeof row.error === "string" + ? row.error.split("\n\n")[0] + : row.error == null + ? null + : String(row.error); + + return { + error, + input: (row.input ?? null) as Json, + metadata: (row.metadata ?? null) as Json, + name: event.span.name ?? null, + output: (row.output ?? null) as Json, + span_attributes: (row.span_attributes ?? null) as Json, + span_id: (row.span_id ?? null) as Json, + span_parents: (row.span_parents ?? null) as Json, + root_span_id: (row.root_span_id ?? null) as Json, + }; +} + +export function summarizeRequest( + request: CapturedRequest, + options: { + includeHeaders?: string[]; + normalizeJsonRawBody?: boolean; + } = {}, +): Json { + const headers = + options.includeHeaders && options.includeHeaders.length > 0 + ? Object.fromEntries( + options.includeHeaders.flatMap((key) => { + const value = request.headers[key]; + return value === undefined ? [] : [[key, value]]; + }), + ) + : null; + + return { + headers: + headers && Object.keys(headers).length > 0 ? (headers as Json) : null, + jsonBody: (request.jsonBody ?? null) as Json, + method: request.method, + path: request.path, + query: + Object.keys(request.query).length === 0 ? null : (request.query as Json), + rawBody: + options.normalizeJsonRawBody && request.jsonBody + ? (request.jsonBody as Json) + : request.rawBody || null, + }; +} + +function otlpAttributeValue(value: unknown): Json { + if (!isRecord(value)) { + return null; + } + + if (typeof value.stringValue === "string") { + return value.stringValue; + } + if (typeof value.boolValue === "boolean") { + return value.boolValue; + } + if (typeof value.intValue === "string") { + return value.intValue; + } + if (typeof value.doubleValue === "number") { + return value.doubleValue; + } + const arrayValues = + isRecord(value.arrayValue) && Array.isArray(value.arrayValue.values) + ? value.arrayValue.values + : undefined; + if (arrayValues) { + return arrayValues.map((entry: unknown) => otlpAttributeValue(entry)); + } + + return null; +} + +export type OtlpSpanSummary = { + attributes: Record; + name: string; + parentSpanId?: string; + spanId?: string; + traceId?: string; +}; + +export function extractOtelSpans(body: JsonValue | null): OtlpSpanSummary[] { + if (!isRecord(body) || !Array.isArray(body.resourceSpans)) { + return []; + } + + const spans: OtlpSpanSummary[] = []; + for (const resourceSpan of body.resourceSpans) { + if (!isRecord(resourceSpan) || !Array.isArray(resourceSpan.scopeSpans)) { + continue; + } + + for (const scopeSpan of resourceSpan.scopeSpans) { + if (!isRecord(scopeSpan) || !Array.isArray(scopeSpan.spans)) { + continue; + } + + for (const span of scopeSpan.spans) { + if (!isRecord(span) || typeof span.name !== "string") { + continue; + } + + const attributes: Record = {}; + if (Array.isArray(span.attributes)) { + for (const attribute of span.attributes) { + if (!isRecord(attribute) || typeof attribute.key !== "string") { + continue; + } + attributes[attribute.key] = otlpAttributeValue(attribute.value); + } + } + + spans.push({ + attributes, + name: span.name, + parentSpanId: + typeof span.parentSpanId === "string" + ? span.parentSpanId + : undefined, + spanId: typeof span.spanId === "string" ? span.spanId : undefined, + traceId: typeof span.traceId === "string" ? span.traceId : undefined, + }); + } + } + } + + return spans; +} diff --git a/e2e/tests/logger-basic.test.ts b/e2e/tests/logger-basic.test.ts deleted file mode 100644 index 0cce4ed5b..000000000 --- a/e2e/tests/logger-basic.test.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { expect, test } from "vitest"; -import { - createTestRunId, - getPayloadsForRun, - getTestServerEnv, - waitForRunEvent, -} from "./helpers/ingestion"; -import { normalizeForSnapshot, type Json } from "./helpers/normalize"; -import { runScenarioOrThrow } from "./helpers/run-scenario"; - -test("logger-basic registers a project and sends normalized project logs", async () => { - const testRunId = createTestRunId(); - const rootSpanPromise = waitForRunEvent( - testRunId, - (event) => event.span.name === "root-span" && event.span.ended, - ); - const childSpanPromise = waitForRunEvent( - testRunId, - (event) => event.span.name === "child-span" && event.span.ended, - ); - - await runScenarioOrThrow( - "scenarios/logger-basic.ts", - getTestServerEnv(testRunId), - ); - - const [rootSpanEvent, childSpanEvent] = await Promise.all([ - rootSpanPromise, - childSpanPromise, - ]); - - expect(normalizeForSnapshot(rootSpanEvent.row as Json)).toMatchSnapshot( - "root-span", - ); - expect(normalizeForSnapshot(childSpanEvent.row as Json)).toMatchSnapshot( - "child-span", - ); - - const logs3Payloads = await getPayloadsForRun(testRunId); - expect(normalizeForSnapshot(logs3Payloads as Json)).toMatchSnapshot( - "logs3-payloads", - ); -}); diff --git a/e2e/tests/openai-auto-instrumentation-node-hook.test.ts b/e2e/tests/openai-auto-instrumentation-node-hook.test.ts new file mode 100644 index 000000000..ea77c504a --- /dev/null +++ b/e2e/tests/openai-auto-instrumentation-node-hook.test.ts @@ -0,0 +1,49 @@ +import { expect, test } from "vitest"; +import { normalizeForSnapshot, type Json } from "./helpers/normalize"; +import { + OPENAI_AUTO_HOOK_SCENARIOS, + OPENAI_SCENARIO_TIMEOUT_MS, + summarizeOpenAIContract, +} from "./helpers/openai"; +import { withScenarioHarness } from "./helpers/scenario-harness"; +import { findLatestChildSpan, findLatestSpan } from "./helpers/trace-selectors"; + +for (const scenario of OPENAI_AUTO_HOOK_SCENARIOS) { + test(`openai auto-instrumentation via node hook collects traces without manual wrapping (openai ${scenario.version})`, async () => { + await withScenarioHarness(async ({ events, runNodeScenario }) => { + await runNodeScenario( + scenario.scenarioPath, + ["--import", "braintrust/hook.mjs"], + OPENAI_SCENARIO_TIMEOUT_MS, + ); + + const capturedEvents = events(); + const root = findLatestSpan(capturedEvents, "openai-auto-hook-root"); + const chatCompletion = + findLatestChildSpan(capturedEvents, "Chat Completion", root?.span.id) ?? + findLatestSpan(capturedEvents, "Chat Completion"); + + expect(root).toBeDefined(); + expect(root?.row.metadata).toMatchObject({ + openaiSdkVersion: scenario.version, + }); + expect(chatCompletion).toBeDefined(); + expect(chatCompletion?.span.parentIds).toEqual([root?.span.id ?? ""]); + expect(chatCompletion?.row.metadata).toMatchObject({ + provider: "openai", + }); + expect( + typeof (chatCompletion?.row.metadata as { model?: unknown } | undefined) + ?.model, + ).toBe("string"); + + expect( + normalizeForSnapshot( + [root, chatCompletion].map((event) => + summarizeOpenAIContract(event!), + ) as Json, + ), + ).toMatchSnapshot("span-events"); + }); + }); +} diff --git a/e2e/tests/otel-compat-mixed-tracing.test.ts b/e2e/tests/otel-compat-mixed-tracing.test.ts new file mode 100644 index 000000000..a280ec488 --- /dev/null +++ b/e2e/tests/otel-compat-mixed-tracing.test.ts @@ -0,0 +1,41 @@ +import { expect, test } from "vitest"; +import { normalizeForSnapshot, type Json } from "./helpers/normalize"; +import { withScenarioHarness } from "./helpers/scenario-harness"; +import { findLatestSpan } from "./helpers/trace-selectors"; +import { extractOtelSpans, summarizeEvent } from "./helpers/trace-summary"; + +test("otel-compat-mixed-tracing unifies Braintrust and OTEL spans into one trace", async () => { + await withScenarioHarness( + async ({ requestsAfter, runScenario, testRunEvents }) => { + await runScenario("scenarios/otel-compat-mixed-tracing.ts"); + + const btEvents = testRunEvents(); + const btRoot = findLatestSpan(btEvents, "bt-root"); + const btChild = findLatestSpan(btEvents, "bt-child-under-otel"); + + expect(btRoot).toBeDefined(); + expect(btChild).toBeDefined(); + + const otelRequests = requestsAfter( + 0, + (request) => request.path === "/otel/v1/traces", + ); + expect(otelRequests.length).toBeGreaterThanOrEqual(1); + + const otelSpans = extractOtelSpans(otelRequests[0].jsonBody); + const otelMiddle = otelSpans.find((span) => span.name === "otel-middle"); + + expect(otelMiddle).toBeDefined(); + expect(otelMiddle?.traceId).toBe(btRoot?.span.rootId); + expect(otelMiddle?.parentSpanId).toBe(btRoot?.span.id); + expect(btChild?.span.rootId).toBe(btRoot?.span.rootId); + expect(btChild?.span.parentIds).toContain(otelMiddle?.spanId ?? ""); + + expect( + normalizeForSnapshot( + [btRoot, btChild].map((event) => summarizeEvent(event!)) as Json, + ), + ).toMatchSnapshot("braintrust-span-events"); + }, + ); +}); diff --git a/e2e/tests/otel-span-processor-export.test.ts b/e2e/tests/otel-span-processor-export.test.ts new file mode 100644 index 000000000..f344f3097 --- /dev/null +++ b/e2e/tests/otel-span-processor-export.test.ts @@ -0,0 +1,34 @@ +import { expect, test } from "vitest"; +import { withScenarioHarness } from "./helpers/scenario-harness"; +import { extractOtelSpans, summarizeRequest } from "./helpers/trace-summary"; + +test("otel-span-processor-export sends filtered OTLP traces to Braintrust", async () => { + await withScenarioHarness( + async ({ requestsAfter, runScenario, testRunId }) => { + await runScenario("scenarios/otel-span-processor-export.ts"); + + const requests = requestsAfter( + 0, + (request) => request.path === "/otel/v1/traces", + ); + expect(requests).toHaveLength(1); + + const request = requests[0]; + const spans = extractOtelSpans(request.jsonBody); + + expect(request.headers["x-bt-parent"]).toContain(testRunId.toLowerCase()); + expect(spans.map((span) => span.name)).toContain("gen_ai.completion"); + expect(spans.map((span) => span.name)).not.toContain("root-operation"); + expect(spans[0]?.attributes["gen_ai.system"]).toBe("openai"); + + expect( + summarizeRequest(request, { + includeHeaders: ["content-type", "x-bt-parent"], + }), + ).toMatchObject({ + method: "POST", + path: "/otel/v1/traces", + }); + }, + ); +}); diff --git a/e2e/tests/trace-context-and-continuation.test.ts b/e2e/tests/trace-context-and-continuation.test.ts new file mode 100644 index 000000000..638e9b33c --- /dev/null +++ b/e2e/tests/trace-context-and-continuation.test.ts @@ -0,0 +1,70 @@ +import { expect, test } from "vitest"; +import { normalizeForSnapshot, type Json } from "./helpers/normalize"; +import { withScenarioHarness } from "./helpers/scenario-harness"; +import { findLatestSpan } from "./helpers/trace-selectors"; +import { summarizeEvent } from "./helpers/trace-summary"; + +test("trace-context-and-continuation supports reattachment and late span updates", async () => { + await withScenarioHarness( + async ({ payloads, runScenario, testRunEvents, testRunId }) => { + await runScenario("scenarios/trace-context-and-continuation.ts"); + + const capturedEvents = testRunEvents(); + const root = findLatestSpan(capturedEvents, "context-root"); + const currentChild = findLatestSpan(capturedEvents, "current-child"); + const reattachedChild = findLatestSpan( + capturedEvents, + "reattached-child", + ); + const lateUpdate = findLatestSpan(capturedEvents, "late-update"); + + expect(root).toBeDefined(); + expect(currentChild).toBeDefined(); + expect(reattachedChild).toBeDefined(); + expect(lateUpdate).toBeDefined(); + + expect(currentChild?.span.parentIds).toEqual([root?.span.id ?? ""]); + expect(reattachedChild?.span.parentIds).toEqual([root?.span.id ?? ""]); + expect(reattachedChild?.span.rootId).toBe(root?.span.rootId); + expect(lateUpdate?.row.metadata).toMatchObject({ + patched: true, + testRunId, + }); + expect(lateUpdate?.row.output).toEqual({ + state: "updated", + }); + + expect( + normalizeForSnapshot( + [ + "context-root", + "current-child", + "reattached-child", + "late-update", + ].map((name) => + summarizeEvent(findLatestSpan(capturedEvents, name)!), + ) as Json, + ), + ).toMatchSnapshot("span-events"); + + const mutationRows = payloads() + .flatMap((payload) => payload.rows) + .filter((row) => { + const metadata = + row.metadata && typeof row.metadata === "object" + ? row.metadata + : null; + return ( + metadata !== null && + "testRunId" in metadata && + (metadata as Record).testRunId === testRunId && + row.id === lateUpdate?.row.id + ); + }); + + expect(normalizeForSnapshot(mutationRows as Json)).toMatchSnapshot( + "late-update-payloads", + ); + }, + ); +}); diff --git a/e2e/tests/trace-primitives-basic.test.ts b/e2e/tests/trace-primitives-basic.test.ts new file mode 100644 index 000000000..0c9e7242e --- /dev/null +++ b/e2e/tests/trace-primitives-basic.test.ts @@ -0,0 +1,55 @@ +import { expect, test } from "vitest"; +import { normalizeForSnapshot, type Json } from "./helpers/normalize"; +import { withScenarioHarness } from "./helpers/scenario-harness"; +import { findLatestSpan } from "./helpers/trace-selectors"; +import { summarizeEvent, summarizeRequest } from "./helpers/trace-summary"; + +test("trace-primitives-basic collects a minimal manual trace tree", async () => { + await withScenarioHarness( + async ({ requestCursor, requestsAfter, runScenario, testRunEvents }) => { + const cursor = requestCursor(); + + await runScenario("scenarios/trace-primitives-basic.ts"); + + const capturedEvents = testRunEvents(); + const root = findLatestSpan(capturedEvents, "trace-primitives-root"); + const child = findLatestSpan(capturedEvents, "basic-child"); + const error = findLatestSpan(capturedEvents, "basic-error"); + + expect(root).toBeDefined(); + expect(child).toBeDefined(); + expect(error).toBeDefined(); + + expect(child?.span.parentIds).toEqual([root?.span.id ?? ""]); + expect(error?.span.parentIds).toEqual([root?.span.id ?? ""]); + expect(root?.span.rootId).toBe(root?.span.id); + + expect( + normalizeForSnapshot( + ["trace-primitives-root", "basic-child", "basic-error"].map((name) => + summarizeEvent(findLatestSpan(capturedEvents, name)!), + ) as Json, + ), + ).toMatchSnapshot("span-events"); + + const requests = requestsAfter( + cursor, + (request) => + request.path === "/api/apikey/login" || + request.path === "/api/project/register" || + request.path === "/version" || + request.path === "/logs3", + ); + + expect( + normalizeForSnapshot( + requests.map((request) => + summarizeRequest(request, { + normalizeJsonRawBody: true, + }), + ) as Json, + ), + ).toMatchSnapshot("request-flow"); + }, + ); +}); diff --git a/e2e/tests/wrap-openai-conversation-traces.test.ts b/e2e/tests/wrap-openai-conversation-traces.test.ts new file mode 100644 index 000000000..c569d9dec --- /dev/null +++ b/e2e/tests/wrap-openai-conversation-traces.test.ts @@ -0,0 +1,128 @@ +import { expect, test } from "vitest"; +import { normalizeForSnapshot, type Json } from "./helpers/normalize"; +import { + OPENAI_SCENARIO_TIMEOUT_MS, + WRAP_OPENAI_SCENARIOS, + summarizeOpenAIContract, +} from "./helpers/openai"; +import { withScenarioHarness } from "./helpers/scenario-harness"; +import { findLatestChildSpan, findLatestSpan } from "./helpers/trace-selectors"; + +test.each( + WRAP_OPENAI_SCENARIOS.map( + ({ scenarioPath, version }) => [version, scenarioPath] as const, + ), +)( + "wrap-openai-conversation-traces logs wrapped chat and responses traces (openai %s)", + async (version, scenarioPath) => { + await withScenarioHarness(async ({ events, runScenario }) => { + await runScenario(scenarioPath, OPENAI_SCENARIO_TIMEOUT_MS); + + const capturedEvents = events(); + + const root = findLatestSpan(capturedEvents, "openai-wrapper-root"); + const chatOperation = findLatestSpan( + capturedEvents, + "openai-chat-operation", + ); + const streamOperation = findLatestSpan( + capturedEvents, + "openai-stream-operation", + ); + const responsesOperation = findLatestSpan( + capturedEvents, + "openai-responses-operation", + ); + const chatCompletionSpan = findLatestChildSpan( + capturedEvents, + "Chat Completion", + chatOperation?.span.id, + ); + const streamCompletionSpan = findLatestChildSpan( + capturedEvents, + "Chat Completion", + streamOperation?.span.id, + ); + const responsesSpan = findLatestChildSpan( + capturedEvents, + "openai.responses.create", + responsesOperation?.span.id, + ); + + expect(root).toBeDefined(); + expect(chatOperation).toBeDefined(); + expect(streamOperation).toBeDefined(); + expect(responsesOperation).toBeDefined(); + expect(chatCompletionSpan).toBeDefined(); + expect(streamCompletionSpan).toBeDefined(); + expect(responsesSpan).toBeDefined(); + expect(root?.row.metadata).toMatchObject({ + openaiSdkVersion: version, + }); + expect(chatOperation?.row.metadata).toMatchObject({ + operation: "chat", + }); + expect(streamOperation?.row.metadata).toMatchObject({ + operation: "stream", + }); + expect(responsesOperation?.row.metadata).toMatchObject({ + operation: "responses", + }); + expect(chatCompletionSpan?.row.metadata).toMatchObject({ + provider: "openai", + }); + expect( + typeof ( + chatCompletionSpan?.row.metadata as { model?: unknown } | undefined + )?.model, + ).toBe("string"); + expect(streamCompletionSpan?.row.metadata).toMatchObject({ + provider: "openai", + }); + expect( + typeof ( + streamCompletionSpan?.row.metadata as { model?: unknown } | undefined + )?.model, + ).toBe("string"); + expect(responsesSpan?.row.metadata).toMatchObject({ + provider: "openai", + }); + expect( + typeof (responsesSpan?.row.metadata as { model?: unknown } | undefined) + ?.model, + ).toBe("string"); + + expect(chatOperation?.span.parentIds).toEqual([root?.span.id ?? ""]); + expect(streamOperation?.span.parentIds).toEqual([root?.span.id ?? ""]); + expect(responsesOperation?.span.parentIds).toEqual([root?.span.id ?? ""]); + expect(chatCompletionSpan?.span.parentIds).toEqual([ + chatOperation?.span.id ?? "", + ]); + expect(streamCompletionSpan?.span.parentIds).toEqual([ + streamOperation?.span.id ?? "", + ]); + expect(responsesSpan?.span.parentIds).toEqual([ + responsesOperation?.span.id ?? "", + ]); + expect(chatCompletionSpan?.input).toBeDefined(); + expect(chatCompletionSpan?.output).toBeDefined(); + expect(streamCompletionSpan?.output).toBeDefined(); + expect(streamCompletionSpan?.metrics).toBeDefined(); + expect(responsesSpan?.output).toBeDefined(); + + expect( + normalizeForSnapshot( + [ + root, + chatOperation, + chatCompletionSpan, + streamOperation, + streamCompletionSpan, + responsesOperation, + responsesSpan, + ].map((event) => summarizeOpenAIContract(event!)) as Json, + ), + ).toMatchSnapshot("span-events"); + }); + }, +); diff --git a/e2e/vitest.config.mts b/e2e/vitest.config.mts index 3b0933b87..391c4255c 100644 --- a/e2e/vitest.config.mts +++ b/e2e/vitest.config.mts @@ -2,7 +2,6 @@ import { defineConfig } from "vitest/config"; export default defineConfig({ test: { - globalSetup: ["./tests/global-setup.ts"], hookTimeout: 20_000, include: ["tests/**/*.test.ts"], testTimeout: 20_000, diff --git a/js/src/auto-instrumentations/configs/openai.test.ts b/js/src/auto-instrumentations/configs/openai.test.ts index f43fdbdf1..130bc7438 100644 --- a/js/src/auto-instrumentations/configs/openai.test.ts +++ b/js/src/auto-instrumentations/configs/openai.test.ts @@ -2,6 +2,10 @@ import { describe, it, expect } from "vitest"; import { openaiConfigs } from "./openai"; describe("OpenAI Instrumentation Configs", () => { + function configsForChannel(channelName: string) { + return openaiConfigs.filter((config) => config.channelName === channelName); + } + it("should have valid configs", () => { expect(openaiConfigs).toBeDefined(); expect(Array.isArray(openaiConfigs)).toBe(true); @@ -16,7 +20,9 @@ describe("OpenAI Instrumentation Configs", () => { expect(config).toBeDefined(); expect(config?.module.name).toBe("openai"); expect(config?.module.versionRange).toBe(">=4.0.0"); - expect(config?.module.filePath).toBe("resources/chat/completions.mjs"); + expect(config?.module.filePath).toBe( + "resources/chat/completions/completions.mjs", + ); expect((config?.functionQuery as any).className).toBe("Completions"); expect((config?.functionQuery as any).methodName).toBe("create"); expect((config?.functionQuery as any).kind).toBe("Async"); @@ -47,16 +53,37 @@ describe("OpenAI Instrumentation Configs", () => { }); it("should have beta.chat.completions.parse config", () => { - const config = openaiConfigs.find( - (c) => c.channelName === "beta.chat.completions.parse", + const configs = configsForChannel("beta.chat.completions.parse"); + + expect(configs).toHaveLength(2); + expect(configs).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + module: expect.objectContaining({ + name: "openai", + versionRange: ">=4.0.0 <5.0.0", + filePath: "resources/beta/chat/completions.mjs", + }), + functionQuery: expect.objectContaining({ + className: "Completions", + methodName: "parse", + kind: "Async", + }), + }), + expect.objectContaining({ + module: expect.objectContaining({ + name: "openai", + versionRange: ">=5.0.0", + filePath: "resources/chat/completions/completions.mjs", + }), + functionQuery: expect.objectContaining({ + className: "Completions", + methodName: "parse", + kind: "Async", + }), + }), + ]), ); - - expect(config).toBeDefined(); - expect(config?.module.name).toBe("openai"); - expect(config?.module.filePath).toBe("resources/beta/chat/completions.mjs"); - expect((config?.functionQuery as any).className).toBe("Completions"); - expect((config?.functionQuery as any).methodName).toBe("parse"); - expect((config?.functionQuery as any).kind).toBe("Async"); }); it("should NOT include braintrust: prefix (code-transformer adds orchestrion:openai: prefix)", () => { @@ -74,7 +101,9 @@ describe("OpenAI Instrumentation Configs", () => { it("should have valid version ranges", () => { for (const config of openaiConfigs) { - expect(config.module.versionRange).toMatch(/^>=\d+\.\d+\.\d+$/); + expect(config.module.versionRange).toMatch( + /^>=\d+\.\d+\.\d+( <\d+\.\d+\.\d+)?$/, + ); } }); @@ -86,16 +115,37 @@ describe("OpenAI Instrumentation Configs", () => { }); it("should have beta.chat.completions.stream config with Sync kind", () => { - const config = openaiConfigs.find( - (c) => c.channelName === "beta.chat.completions.stream", + const configs = configsForChannel("beta.chat.completions.stream"); + + expect(configs).toHaveLength(2); + expect(configs).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + module: expect.objectContaining({ + name: "openai", + versionRange: ">=4.0.0 <5.0.0", + filePath: "resources/beta/chat/completions.mjs", + }), + functionQuery: expect.objectContaining({ + className: "Completions", + methodName: "stream", + kind: "Sync", + }), + }), + expect.objectContaining({ + module: expect.objectContaining({ + name: "openai", + versionRange: ">=5.0.0", + filePath: "resources/chat/completions/completions.mjs", + }), + functionQuery: expect.objectContaining({ + className: "Completions", + methodName: "stream", + kind: "Sync", + }), + }), + ]), ); - - expect(config).toBeDefined(); - expect(config?.module.name).toBe("openai"); - expect(config?.module.filePath).toBe("resources/beta/chat/completions.mjs"); - expect((config?.functionQuery as any).className).toBe("Completions"); - expect((config?.functionQuery as any).methodName).toBe("stream"); - expect((config?.functionQuery as any).kind).toBe("Sync"); }); it("should have responses.create config with version >=4.87.0", () => { diff --git a/js/src/auto-instrumentations/configs/openai.ts b/js/src/auto-instrumentations/configs/openai.ts index ec293c7d2..277d7aa4b 100644 --- a/js/src/auto-instrumentations/configs/openai.ts +++ b/js/src/auto-instrumentations/configs/openai.ts @@ -19,7 +19,7 @@ export const openaiConfigs: InstrumentationConfig[] = [ module: { name: "openai", versionRange: ">=4.0.0", - filePath: "resources/chat/completions.mjs", + filePath: "resources/chat/completions/completions.mjs", }, functionQuery: { className: "Completions", @@ -48,7 +48,7 @@ export const openaiConfigs: InstrumentationConfig[] = [ channelName: OPENAI_CHANNEL_SUFFIX.BETA_CHAT_COMPLETIONS_PARSE, module: { name: "openai", - versionRange: ">=4.0.0", + versionRange: ">=4.0.0 <5.0.0", filePath: "resources/beta/chat/completions.mjs", }, functionQuery: { @@ -58,6 +58,20 @@ export const openaiConfigs: InstrumentationConfig[] = [ }, }, + { + channelName: OPENAI_CHANNEL_SUFFIX.BETA_CHAT_COMPLETIONS_PARSE, + module: { + name: "openai", + versionRange: ">=5.0.0", + filePath: "resources/chat/completions/completions.mjs", + }, + functionQuery: { + className: "Completions", + methodName: "parse", + kind: "Async", + }, + }, + // Moderations { channelName: OPENAI_CHANNEL_SUFFIX.MODERATIONS_CREATE, @@ -78,7 +92,7 @@ export const openaiConfigs: InstrumentationConfig[] = [ channelName: OPENAI_CHANNEL_SUFFIX.BETA_CHAT_COMPLETIONS_STREAM, module: { name: "openai", - versionRange: ">=4.0.0", + versionRange: ">=4.0.0 <5.0.0", filePath: "resources/beta/chat/completions.mjs", }, functionQuery: { @@ -88,6 +102,20 @@ export const openaiConfigs: InstrumentationConfig[] = [ }, }, + { + channelName: OPENAI_CHANNEL_SUFFIX.BETA_CHAT_COMPLETIONS_STREAM, + module: { + name: "openai", + versionRange: ">=5.0.0", + filePath: "resources/chat/completions/completions.mjs", + }, + functionQuery: { + className: "Completions", + methodName: "stream", + kind: "Sync", + }, + }, + // Responses API (v4.87.0+) { channelName: OPENAI_CHANNEL_SUFFIX.RESPONSES_CREATE, diff --git a/package.json b/package.json index bde676962..f42773d7b 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "knip": "knip --config knip.jsonc --no-config-hints", "test": "dotenv -e .env -- turbo run test --filter=\"!@braintrust/otel\"", "test:e2e": "dotenv -e .env -- turbo run test:e2e", + "test:e2e:update": "dotenv -e .env -- turbo run test:e2e:update", "playground": "dotenv -e .env -- turbo run playground --filter=\"braintrust\"", "prepare": "husky || true", "lint:prettier": "prettier --check .", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4873f2a9e..ec34c30e5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -32,12 +32,33 @@ importers: e2e: devDependencies: + '@braintrust/otel': + specifier: workspace:^ + version: link:../integrations/otel-js + '@opentelemetry/api': + specifier: '>=1.9.0' + version: 1.9.0 + '@opentelemetry/context-async-hooks': + specifier: '>=1.9.0' + version: 2.6.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': + specifier: '>=1.9.0' + version: 2.2.0(@opentelemetry/api@1.9.0) '@types/node': specifier: ^20.10.5 version: 20.19.16 braintrust: specifier: workspace:^ version: link:../js + openai: + specifier: 6.25.0 + version: 6.25.0(ws@8.18.3)(zod@3.25.76) + openai-v4: + specifier: npm:openai@4.104.0 + version: openai@4.104.0(ws@8.18.3)(zod@3.25.76) + openai-v5: + specifier: npm:openai@5.11.0 + version: openai@5.11.0(ws@8.18.3)(zod@3.25.76) tsx: specifier: ^3.14.0 version: 3.14.0 @@ -2069,6 +2090,12 @@ packages: resolution: {integrity: sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==} engines: {node: '>=8.0.0'} + '@opentelemetry/context-async-hooks@2.6.0': + resolution: {integrity: sha512-L8UyDwqpTcbkIK5cgwDRDYDoEhQoj8wp8BwsO19w3LB1Z41yEQm2VJyNfAi9DrLP/YTqXqWpKHyZfR9/tFYo1Q==} + engines: {node: ^18.19.0 || >=20.6.0} + peerDependencies: + '@opentelemetry/api': '>=1.0.0 <1.10.0' + '@opentelemetry/core@2.2.0': resolution: {integrity: sha512-FuabnnUm8LflnieVxs6eP7Z383hgQU4W1e3KJS6aOG3RxWxcHyBxH8fDMHNgu/gFx/M2jvTOW/4/PHhLz6bjWw==} engines: {node: ^18.19.0 || >=20.6.0} @@ -4782,18 +4809,6 @@ packages: zod: optional: true - openai@6.15.0: - resolution: {integrity: sha512-F1Lvs5BoVvmZtzkUEVyh8mDQPPFolq4F+xdsx/DO8Hee8YF3IGAlZqUIsF+DVGhqf4aU0a3bTghsxB6OIsRy1g==} - hasBin: true - peerDependencies: - ws: ^8.18.0 - zod: ^3.25 || ^4.0 - peerDependenciesMeta: - ws: - optional: true - zod: - optional: true - openai@6.25.0: resolution: {integrity: sha512-mEh6VZ2ds2AGGokWARo18aPISI1OhlgdEIC1ewhkZr8pSIT31dec0ecr9Nhxx0JlybyOgoAT1sWeKtwPZzJyww==} hasBin: true @@ -7756,7 +7771,7 @@ snapshots: dependencies: '@langchain/core': 1.1.10(@opentelemetry/api@1.9.0)(@opentelemetry/sdk-trace-base@2.2.0(@opentelemetry/api@1.9.0))(openai@6.25.0(ws@8.18.3)(zod@3.25.76)) js-tiktoken: 1.0.21 - openai: 6.15.0(ws@8.18.3)(zod@3.25.76) + openai: 6.25.0(ws@8.18.3)(zod@3.25.76) zod: 3.25.76 transitivePeerDependencies: - ws @@ -7929,6 +7944,10 @@ snapshots: '@opentelemetry/api@1.9.0': {} + '@opentelemetry/context-async-hooks@2.6.0(@opentelemetry/api@1.9.0)': + dependencies: + '@opentelemetry/api': 1.9.0 + '@opentelemetry/core@2.2.0(@opentelemetry/api@1.9.0)': dependencies: '@opentelemetry/api': 1.9.0 @@ -11346,11 +11365,6 @@ snapshots: ws: 8.18.3 zod: 3.25.76 - openai@6.15.0(ws@8.18.3)(zod@3.25.76): - optionalDependencies: - ws: 8.18.3 - zod: 3.25.76 - openai@6.25.0(ws@8.18.3)(zod@3.25.76): optionalDependencies: ws: 8.18.3 diff --git a/turbo.json b/turbo.json index ebc767257..2679d1c20 100644 --- a/turbo.json +++ b/turbo.json @@ -1,6 +1,10 @@ { "$schema": "https://turbo.build/schema.json", - "globalPassThroughEnv": ["OPENAI_API_KEY", "ANTHROPIC_API_KEY"], + "globalPassThroughEnv": [ + "OPENAI_API_KEY", + "OPENAI_BASE_URL", + "ANTHROPIC_API_KEY" + ], "tasks": { "build": { "dependsOn": ["^build"], @@ -8,12 +12,32 @@ "env": [] }, "test": { - "env": ["ANTHROPIC_API_KEY", "BRAINTRUST_API_KEY", "OPENAI_API_KEY"], + "env": [ + "ANTHROPIC_API_KEY", + "BRAINTRUST_API_KEY", + "OPENAI_API_KEY", + "OPENAI_BASE_URL" + ], "dependsOn": ["^build"], "outputs": [] }, "test:e2e": { - "env": ["ANTHROPIC_API_KEY", "BRAINTRUST_API_KEY", "OPENAI_API_KEY"], + "env": [ + "ANTHROPIC_API_KEY", + "BRAINTRUST_API_KEY", + "OPENAI_API_KEY", + "OPENAI_BASE_URL" + ], + "dependsOn": ["^build"], + "outputs": [] + }, + "test:e2e:update": { + "env": [ + "ANTHROPIC_API_KEY", + "BRAINTRUST_API_KEY", + "OPENAI_API_KEY", + "OPENAI_BASE_URL" + ], "dependsOn": ["^build"], "outputs": [] }, From 16452287c0a778ee131ccead9ae9977ba6c6e004 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Fri, 6 Mar 2026 16:25:38 -0800 Subject: [PATCH 04/11] . --- e2e/README.md | 49 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/e2e/README.md b/e2e/README.md index 08ffea44d..e457f3d0b 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -1,50 +1,49 @@ # E2E Tests -End-to-end tests that validate the Braintrust SDK by running real SDK usage scenarios against a mock server. +End-to-end tests that validate the Braintrust SDK by running real usage scenarios against a mock Braintrust server. ## How It Works -1. A **mock Braintrust server** starts before all tests (via Vitest global setup) -2. Each test spawns a **scenario script** as a subprocess using `tsx`, with env vars pointing at the mock server -3. The scenario uses the SDK normally (init, create spans, log data, flush) -4. The test waits for expected events to arrive at the mock server, then **normalizes** and **snapshots** them +1. Each test uses `withScenarioHarness(...)`, which starts an isolated mock Braintrust server +2. The test spawns a scenario script as a subprocess, usually with `tsx` +3. The scenario uses the SDK normally (init, create spans, log data, flush, or OTEL / OpenAI integrations) +4. The test inspects captured events, payloads, or raw HTTP requests, then normalizes and snapshots them where useful -Subprocess isolation ensures the SDK operates exactly as it would in production. +Subprocess isolation keeps the SDK execution path close to production, including plain Node runs for auto-instrumentation hook coverage. ## Structure ``` e2e/ -├── scenarios/ # Standalone scripts that use the SDK (run as subprocesses) -├── tests/ -│ ├── helpers/ # Test utilities (see below) -│ ├── global-setup.ts # Starts mock server, injects URL + API key into test context -│ ├── *.test.ts # Test files -│ └── __snapshots__/ # Vitest snapshot files -└── vitest.config.mts +|- scenarios/ # Standalone scripts run as subprocesses +|- tests/ +| |- helpers/ # Harness, mock server, normalization, selectors, summaries +| |- *.test.ts # Trace, OTEL, and OpenAI coverage +| `- __snapshots__/ # Vitest snapshots +`- vitest.config.mts ``` ## Helpers (`tests/helpers/`) -- `mock-braintrust-server.ts` — Mock Braintrust API server (started automatically via global setup). -- `run-scenario.ts` — Spawns scenario scripts as subprocesses. -- `ingestion.ts` — Utilities for retrieving and waiting on data captured by the mock server. -- `normalize.ts` — Makes captured data deterministic for snapshot testing. +- `scenario-harness.ts` - Starts the mock server, creates a unique test run id, and runs scenarios. +- `mock-braintrust-server.ts` - Captures requests, merged log payloads, and parsed span-like events. +- `normalize.ts` - Makes snapshots deterministic by normalizing ids, timestamps, paths, and mock-server URLs. +- `trace-selectors.ts` / `trace-summary.ts` - Helpers for finding spans and snapshotting only the relevant shape. +- `openai.ts` - Shared scenario lists and assertions for OpenAI wrapper and hook coverage across v4/v5/v6. ### Writing a new test -Use `runScenarioOrThrow(scenarioFile, env)` to execute a scenario. It runs the file with `tsx`, passes your env vars, and throws on non-zero exit. Default timeout is 15s. +Most tests use `withScenarioHarness(async (harness) => { ... })`. It gives each test a fresh server plus helpers for running scenarios and reading what the server captured. The main utilities you'll use in test files: -- `createTestRunId()` — Returns a unique `e2e-{uuid}` string. Pass it to your scenario via env vars so you can filter events for your test. -- `getTestServerEnv(testRunId)` — Returns the env vars a scenario needs to talk to the mock server (`BRAINTRUST_API_URL`, `BRAINTRUST_API_KEY`, `TEST_RUN_ID`). -- `waitForRunEvent(testRunId, predicate)` — Polls the mock server until an event matching the test run ID and predicate arrives (5s timeout, 50ms interval). Returns the matched `CapturedLogEvent`. -- `waitForEvent(predicate)` — Same as above but without filtering by test run ID. -- `getPayloadsForRun(testRunId)` — Returns all raw `logs3` payloads for a given test run. -- `getEvents()` / `getPayloads()` — Low-level access to all captured events/payloads, with optional predicate filtering. +- `runScenario(path, timeoutMs?)` - Runs a TypeScript scenario with `tsx`. +- `runNodeScenario(path, nodeArgs?, timeoutMs?)` - Runs plain Node scenarios, used for `--import braintrust/hook.mjs`. +- `testRunEvents()` - Returns parsed events tagged with the current test run id. +- `events()`, `payloads()`, `requestCursor()`, `requestsAfter()` - Lower-level access for ingestion payloads and HTTP request flow assertions. +- `testRunId` - Useful when a scenario or assertion needs the exact run marker. -Use `normalizeEvent(event)` and `normalizePayloads(payloads)` before snapshotting. Replaces timestamps with ``, UUIDs with indexed tokens (``, ``, ``, ``), and absolute file paths with relative ones. +Use `normalizeForSnapshot(...)` before snapshotting. It replaces timestamps and ids with stable tokens and strips machine-specific paths and localhost ports. ## Running From 62f46eebfbd562cbc570b1e632441fb465e6a630 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Fri, 6 Mar 2026 16:33:58 -0800 Subject: [PATCH 05/11] fix tests --- .../trace-primitives-basic.test.ts.snap | 40 +++++++++---------- e2e/tests/helpers/normalize.ts | 34 ++++++++++++++++ .../configs/openai.test.ts | 40 ++++++++++++++----- .../auto-instrumentations/configs/openai.ts | 16 +++++++- 4 files changed, 98 insertions(+), 32 deletions(-) diff --git a/e2e/tests/__snapshots__/trace-primitives-basic.test.ts.snap b/e2e/tests/__snapshots__/trace-primitives-basic.test.ts.snap index 0f736c84f..f58d7f990 100644 --- a/e2e/tests/__snapshots__/trace-primitives-basic.test.ts.snap +++ b/e2e/tests/__snapshots__/trace-primitives-basic.test.ts.snap @@ -158,16 +158,16 @@ exports[`trace-primitives-basic collects a minimal manual trace tree > request-f "error": "basic boom Error: basic boom - at logger.traced.name (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/e2e/scenarios/trace-primitives-basic.ts:41:27) - at /Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:7507:63 - at AsyncLocalStorage.run (node:internal/async_local_storage/async_context_frame:63:14) - at BraintrustContextManager.runInContext (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:4421:30) - at withCurrent (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:7507:38) - at /Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:5723:18 - at runCatchFinally (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:1551:17) - at _class9.traced (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:5720:17) - at main (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/e2e/scenarios/trace-primitives-basic.ts:10:16) - at runMain (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/e2e/scenarios/helpers.ts:40:8)", + at logger.traced.name (/e2e/scenarios/trace-primitives-basic.ts:0:0) + at /js/dist/index.js:0:0 + at AsyncLocalStorage.run (node::0:0) + at BraintrustContextManager.runInContext (/js/dist/index.js:0:0) + at withCurrent (/js/dist/index.js:0:0) + at /js/dist/index.js:0:0 + at runCatchFinally (/js/dist/index.js:0:0) + at _class9.traced (/js/dist/index.js:0:0) + at main (/e2e/scenarios/trace-primitives-basic.ts:0:0) + at runMain (/e2e/scenarios/helpers.ts:0:0)", "id": "", "log_id": "g", "metadata": { @@ -256,16 +256,16 @@ Error: basic boom "error": "basic boom Error: basic boom - at logger.traced.name (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/e2e/scenarios/trace-primitives-basic.ts:41:27) - at /Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:7507:63 - at AsyncLocalStorage.run (node:internal/async_local_storage/async_context_frame:63:14) - at BraintrustContextManager.runInContext (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:4421:30) - at withCurrent (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:7507:38) - at /Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:5723:18 - at runCatchFinally (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:1551:17) - at _class9.traced (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/js/dist/index.js:5720:17) - at main (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/e2e/scenarios/trace-primitives-basic.ts:10:16) - at runMain (/Users/lucaforstner/conductor/workspaces/braintrust-sdk-javascript/quebec/e2e/scenarios/helpers.ts:40:8)", + at logger.traced.name (/e2e/scenarios/trace-primitives-basic.ts:0:0) + at /js/dist/index.js:0:0 + at AsyncLocalStorage.run (node::0:0) + at BraintrustContextManager.runInContext (/js/dist/index.js:0:0) + at withCurrent (/js/dist/index.js:0:0) + at /js/dist/index.js:0:0 + at runCatchFinally (/js/dist/index.js:0:0) + at _class9.traced (/js/dist/index.js:0:0) + at main (/e2e/scenarios/trace-primitives-basic.ts:0:0) + at runMain (/e2e/scenarios/helpers.ts:0:0)", "id": "", "log_id": "g", "metadata": { diff --git a/e2e/tests/helpers/normalize.ts b/e2e/tests/helpers/normalize.ts index eb59bbeaa..2f4c0e852 100644 --- a/e2e/tests/helpers/normalize.ts +++ b/e2e/tests/helpers/normalize.ts @@ -1,3 +1,6 @@ +import * as path from "node:path"; +import { fileURLToPath } from "node:url"; + type Primitive = null | boolean | number | string; export type Json = | Primitive @@ -24,6 +27,13 @@ const XACT_VERSION_KEYS = new Set([ "initialVersion", "version", ]); +const HELPERS_DIR = path.dirname(fileURLToPath(import.meta.url)); +const REPO_ROOT = path.resolve(HELPERS_DIR, "../../..").replace(/\\/g, "/"); +const STACK_FRAME_REPO_PATH_REGEX = + /(?:[A-Za-z]:)?[^\s)\n]*braintrust-sdk-javascript(?:[\\/](?:braintrust-sdk-javascript|[^\\/\s)\n]+))?((?:[\\/](?:e2e|js)[^:\s)\n]+)):\d+:\d+/g; +const REPO_PATH_REGEX = + /(?:[A-Za-z]:)?[^\s)\n]*braintrust-sdk-javascript(?:[\\/](?:braintrust-sdk-javascript|[^\\/\s)\n]+))?((?:[\\/](?:e2e|js)[^:\s)\n]+))/g; +const NODE_INTERNAL_FRAME_REGEX = /node:[^)\n]+:\d+:\d+/g; function normalizeCallerFilename(value: string): string { const e2eIndex = value.lastIndexOf("/e2e/"); @@ -48,6 +58,28 @@ function normalizeMockServerUrl(value: string): string | undefined { } } +function normalizeStackLikeString(value: string): string { + let normalized = value.replaceAll(REPO_ROOT, ""); + + normalized = normalized.replace( + STACK_FRAME_REPO_PATH_REGEX, + (_, suffix: string) => `${suffix.replace(/\\/g, "/")}:0:0`, + ); + normalized = normalized.replace(REPO_PATH_REGEX, (_, suffix: string) => { + return `${suffix.replace(/\\/g, "/")}`; + }); + normalized = normalized.replace( + /((?:\/(?:e2e|js)\/[^:\s)\n]+)):\d+:\d+/g, + "$1:0:0", + ); + normalized = normalized.replace( + NODE_INTERNAL_FRAME_REGEX, + "node::0:0", + ); + + return normalized; +} + function normalizeObject( value: { [key: string]: Json }, tokenMaps: TokenMaps, @@ -121,6 +153,8 @@ function normalizeValue( } if (typeof value === "string") { + value = normalizeStackLikeString(value); + const normalizedUrl = normalizeMockServerUrl(value); if (normalizedUrl) { return normalizedUrl; diff --git a/js/src/auto-instrumentations/configs/openai.test.ts b/js/src/auto-instrumentations/configs/openai.test.ts index 130bc7438..68d95ea0c 100644 --- a/js/src/auto-instrumentations/configs/openai.test.ts +++ b/js/src/auto-instrumentations/configs/openai.test.ts @@ -13,19 +13,37 @@ describe("OpenAI Instrumentation Configs", () => { }); it("should have chat.completions.create config", () => { - const config = openaiConfigs.find( - (c) => c.channelName === "chat.completions.create", - ); + const configs = configsForChannel("chat.completions.create"); - expect(config).toBeDefined(); - expect(config?.module.name).toBe("openai"); - expect(config?.module.versionRange).toBe(">=4.0.0"); - expect(config?.module.filePath).toBe( - "resources/chat/completions/completions.mjs", + expect(configs).toHaveLength(2); + expect(configs).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + module: expect.objectContaining({ + name: "openai", + versionRange: ">=4.0.0 <5.0.0", + filePath: "resources/chat/completions.mjs", + }), + functionQuery: expect.objectContaining({ + className: "Completions", + methodName: "create", + kind: "Async", + }), + }), + expect.objectContaining({ + module: expect.objectContaining({ + name: "openai", + versionRange: ">=5.0.0", + filePath: "resources/chat/completions/completions.mjs", + }), + functionQuery: expect.objectContaining({ + className: "Completions", + methodName: "create", + kind: "Async", + }), + }), + ]), ); - expect((config?.functionQuery as any).className).toBe("Completions"); - expect((config?.functionQuery as any).methodName).toBe("create"); - expect((config?.functionQuery as any).kind).toBe("Async"); }); it("should have embeddings.create config", () => { diff --git a/js/src/auto-instrumentations/configs/openai.ts b/js/src/auto-instrumentations/configs/openai.ts index 277d7aa4b..72cb77c53 100644 --- a/js/src/auto-instrumentations/configs/openai.ts +++ b/js/src/auto-instrumentations/configs/openai.ts @@ -18,7 +18,21 @@ export const openaiConfigs: InstrumentationConfig[] = [ channelName: OPENAI_CHANNEL_SUFFIX.CHAT_COMPLETIONS_CREATE, module: { name: "openai", - versionRange: ">=4.0.0", + versionRange: ">=4.0.0 <5.0.0", + filePath: "resources/chat/completions.mjs", + }, + functionQuery: { + className: "Completions", + methodName: "create", + kind: "Async", + }, + }, + + { + channelName: OPENAI_CHANNEL_SUFFIX.CHAT_COMPLETIONS_CREATE, + module: { + name: "openai", + versionRange: ">=5.0.0", filePath: "resources/chat/completions/completions.mjs", }, functionQuery: { From d726b451e60247c235f1bc41ba72c97bf99cd5d6 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Fri, 6 Mar 2026 21:43:11 -0800 Subject: [PATCH 06/11] fix tests? --- .../configs/openai.test.ts | 14 ++++- .../auto-instrumentations/configs/openai.ts | 14 +++++ .../fixtures/openai-e2e-test.mjs | 61 +++++++++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 js/tests/auto-instrumentations/fixtures/openai-e2e-test.mjs diff --git a/js/src/auto-instrumentations/configs/openai.test.ts b/js/src/auto-instrumentations/configs/openai.test.ts index 68d95ea0c..824f9608d 100644 --- a/js/src/auto-instrumentations/configs/openai.test.ts +++ b/js/src/auto-instrumentations/configs/openai.test.ts @@ -15,7 +15,7 @@ describe("OpenAI Instrumentation Configs", () => { it("should have chat.completions.create config", () => { const configs = configsForChannel("chat.completions.create"); - expect(configs).toHaveLength(2); + expect(configs).toHaveLength(3); expect(configs).toEqual( expect.arrayContaining([ expect.objectContaining({ @@ -30,6 +30,18 @@ describe("OpenAI Instrumentation Configs", () => { kind: "Async", }), }), + expect.objectContaining({ + module: expect.objectContaining({ + name: "openai", + versionRange: ">=4.0.0 <5.0.0", + filePath: "resources/chat/completions/completions.mjs", + }), + functionQuery: expect.objectContaining({ + className: "Completions", + methodName: "create", + kind: "Async", + }), + }), expect.objectContaining({ module: expect.objectContaining({ name: "openai", diff --git a/js/src/auto-instrumentations/configs/openai.ts b/js/src/auto-instrumentations/configs/openai.ts index 72cb77c53..785e7a495 100644 --- a/js/src/auto-instrumentations/configs/openai.ts +++ b/js/src/auto-instrumentations/configs/openai.ts @@ -28,6 +28,20 @@ export const openaiConfigs: InstrumentationConfig[] = [ }, }, + { + channelName: OPENAI_CHANNEL_SUFFIX.CHAT_COMPLETIONS_CREATE, + module: { + name: "openai", + versionRange: ">=4.0.0 <5.0.0", + filePath: "resources/chat/completions/completions.mjs", + }, + functionQuery: { + className: "Completions", + methodName: "create", + kind: "Async", + }, + }, + { channelName: OPENAI_CHANNEL_SUFFIX.CHAT_COMPLETIONS_CREATE, module: { diff --git a/js/tests/auto-instrumentations/fixtures/openai-e2e-test.mjs b/js/tests/auto-instrumentations/fixtures/openai-e2e-test.mjs new file mode 100644 index 000000000..7dd0d778f --- /dev/null +++ b/js/tests/auto-instrumentations/fixtures/openai-e2e-test.mjs @@ -0,0 +1,61 @@ +import OpenAI from "openai"; +import { initLogger, _exportsForTestingOnly } from "../../../dist/index.mjs"; + +const backgroundLogger = _exportsForTestingOnly.useTestBackgroundLogger(); +await _exportsForTestingOnly.simulateLoginForTests(); + +const logger = initLogger({ + projectName: "auto-instrumentation-test", + projectId: "test-project-id", +}); + +// Create OpenAI client with mocked fetch +const mockFetch = async (url, options) => { + return { + ok: true, + status: 200, + headers: new Headers({ "content-type": "application/json" }), + json: async () => ({ + id: "chatcmpl-test123", + object: "chat.completion", + created: Date.now(), + model: "gpt-4", + choices: [ + { + index: 0, + message: { role: "assistant", content: "Test response" }, + finish_reason: "stop", + }, + ], + usage: { + prompt_tokens: 10, + completion_tokens: 5, + total_tokens: 15, + }, + }), + }; +}; + +const client = new OpenAI({ + apiKey: "test-key", + fetch: mockFetch, +}); + +try { + const completion = await client.chat.completions.create({ + model: "gpt-4", + messages: [{ role: "user", content: "Hello!" }], + }); + + const spans = await backgroundLogger.drain(); + + for (const span of spans) { + console.log("SPAN_DATA:", JSON.stringify(span)); + } + + console.log("SUCCESS: API call completed"); + process.exit(0); +} catch (error) { + console.error("ERROR:", error.message); + process.exit(1); +} From 47e998ca2b9d8c73358780b3c37abcc68437651b Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 10 Mar 2026 09:52:54 +0100 Subject: [PATCH 07/11] folder scenarios --- e2e/README.md | 57 +++++--- .../helpers/mock-braintrust-server.ts | 2 +- e2e/{tests => }/helpers/normalize.ts | 9 +- e2e/{tests => }/helpers/openai.ts | 8 +- e2e/{tests => }/helpers/scenario-harness.ts | 122 ++++++++++++----- e2e/helpers/scenario-installer.ts | 123 ++++++++++++++++++ .../scenario-runtime.ts} | 0 e2e/{tests => }/helpers/trace-selectors.ts | 2 +- e2e/{tests => }/helpers/trace-summary.ts | 2 +- e2e/package.json | 4 +- ...to-instrumentation-node-hook.openai-v4.mjs | 4 - ...to-instrumentation-node-hook.openai-v5.mjs | 4 - ...to-instrumentation-node-hook.openai-v6.mjs | 4 - .../__snapshots__/scenario.test.ts.snap} | 0 .../package.json | 9 ++ .../scenario.impl.mjs} | 0 .../scenario.openai-v4.mjs | 4 + .../scenario.openai-v5.mjs | 4 + .../scenario.openai-v6.mjs | 4 + .../scenario.test.ts} | 36 +++-- .../__snapshots__/scenario.test.ts.snap} | 0 .../scenario.test.ts} | 17 ++- .../scenario.ts} | 2 +- .../scenario.test.ts} | 16 ++- .../scenario.ts} | 2 +- .../__snapshots__/scenario.test.ts.snap} | 4 +- .../scenario.test.ts} | 17 ++- .../scenario.ts} | 6 +- .../__snapshots__/scenario.test.ts.snap} | 60 ++++----- .../trace-primitives-basic/scenario.test.ts} | 17 ++- .../scenario.ts} | 6 +- ...ap-openai-conversation-traces.openai-v4.ts | 5 - ...ap-openai-conversation-traces.openai-v5.ts | 5 - ...ap-openai-conversation-traces.openai-v6.ts | 5 - .../__snapshots__/scenario.test.ts.snap} | 0 .../package.json | 9 ++ .../scenario.impl.ts} | 6 +- .../scenario.openai-v4.ts | 5 + .../scenario.openai-v5.ts | 5 + .../scenario.openai-v6.ts | 5 + .../scenario.test.ts} | 37 ++++-- e2e/tsconfig.json | 4 +- e2e/turbo.json | 3 - e2e/vitest.config.mts | 2 +- knip.json | 23 ++++ pnpm-lock.yaml | 9 -- 46 files changed, 488 insertions(+), 180 deletions(-) rename e2e/{tests => }/helpers/mock-braintrust-server.ts (99%) rename e2e/{tests => }/helpers/normalize.ts (94%) rename e2e/{tests => }/helpers/openai.ts (88%) rename e2e/{tests => }/helpers/scenario-harness.ts (60%) create mode 100644 e2e/helpers/scenario-installer.ts rename e2e/{scenarios/helpers.ts => helpers/scenario-runtime.ts} (100%) rename e2e/{tests => }/helpers/trace-selectors.ts (96%) rename e2e/{tests => }/helpers/trace-summary.ts (99%) delete mode 100644 e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v4.mjs delete mode 100644 e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v5.mjs delete mode 100644 e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v6.mjs rename e2e/{tests/__snapshots__/openai-auto-instrumentation-node-hook.test.ts.snap => scenarios/openai-auto-instrumentation-node-hook/__snapshots__/scenario.test.ts.snap} (100%) create mode 100644 e2e/scenarios/openai-auto-instrumentation-node-hook/package.json rename e2e/scenarios/{openai-auto-instrumentation-node-hook.impl.mjs => openai-auto-instrumentation-node-hook/scenario.impl.mjs} (100%) create mode 100644 e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.openai-v4.mjs create mode 100644 e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.openai-v5.mjs create mode 100644 e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.openai-v6.mjs rename e2e/{tests/openai-auto-instrumentation-node-hook.test.ts => scenarios/openai-auto-instrumentation-node-hook/scenario.test.ts} (62%) rename e2e/{tests/__snapshots__/otel-compat-mixed-tracing.test.ts.snap => scenarios/otel-compat-mixed-tracing/__snapshots__/scenario.test.ts.snap} (100%) rename e2e/{tests/otel-compat-mixed-tracing.test.ts => scenarios/otel-compat-mixed-tracing/scenario.test.ts} (71%) rename e2e/scenarios/{otel-compat-mixed-tracing.ts => otel-compat-mixed-tracing/scenario.ts} (98%) rename e2e/{tests/otel-span-processor-export.test.ts => scenarios/otel-span-processor-export/scenario.test.ts} (73%) rename e2e/scenarios/{otel-span-processor-export.ts => otel-span-processor-export/scenario.ts} (96%) rename e2e/{tests/__snapshots__/trace-context-and-continuation.test.ts.snap => scenarios/trace-context-and-continuation/__snapshots__/scenario.test.ts.snap} (98%) rename e2e/{tests/trace-context-and-continuation.test.ts => scenarios/trace-context-and-continuation/scenario.test.ts} (81%) rename e2e/scenarios/{trace-context-and-continuation.ts => trace-context-and-continuation/scenario.ts} (95%) rename e2e/{tests/__snapshots__/trace-primitives-basic.test.ts.snap => scenarios/trace-primitives-basic/__snapshots__/scenario.test.ts.snap} (88%) rename e2e/{tests/trace-primitives-basic.test.ts => scenarios/trace-primitives-basic/scenario.test.ts} (75%) rename e2e/scenarios/{trace-primitives-basic.ts => trace-primitives-basic/scenario.ts} (93%) delete mode 100644 e2e/scenarios/wrap-openai-conversation-traces.openai-v4.ts delete mode 100644 e2e/scenarios/wrap-openai-conversation-traces.openai-v5.ts delete mode 100644 e2e/scenarios/wrap-openai-conversation-traces.openai-v6.ts rename e2e/{tests/__snapshots__/wrap-openai-conversation-traces.test.ts.snap => scenarios/wrap-openai-conversation-traces/__snapshots__/scenario.test.ts.snap} (100%) create mode 100644 e2e/scenarios/wrap-openai-conversation-traces/package.json rename e2e/scenarios/{wrap-openai-conversation-traces.impl.ts => wrap-openai-conversation-traces/scenario.impl.ts} (96%) create mode 100644 e2e/scenarios/wrap-openai-conversation-traces/scenario.openai-v4.ts create mode 100644 e2e/scenarios/wrap-openai-conversation-traces/scenario.openai-v5.ts create mode 100644 e2e/scenarios/wrap-openai-conversation-traces/scenario.openai-v6.ts rename e2e/{tests/wrap-openai-conversation-traces.test.ts => scenarios/wrap-openai-conversation-traces/scenario.test.ts} (82%) delete mode 100644 e2e/turbo.json create mode 100644 knip.json diff --git a/e2e/README.md b/e2e/README.md index e457f3d0b..a96f097e9 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -4,47 +4,74 @@ End-to-end tests that validate the Braintrust SDK by running real usage scenario ## How It Works -1. Each test uses `withScenarioHarness(...)`, which starts an isolated mock Braintrust server -2. The test spawns a scenario script as a subprocess, usually with `tsx` -3. The scenario uses the SDK normally (init, create spans, log data, flush, or OTEL / OpenAI integrations) -4. The test inspects captured events, payloads, or raw HTTP requests, then normalizes and snapshots them where useful +1. Each colocated `scenario.test.ts` file uses `withScenarioHarness(...)`, which starts an isolated mock Braintrust server. +2. The test resolves its own scenario folder and spawns a scenario entrypoint as a subprocess. +3. The scenario uses the SDK normally (init, create spans, log data, flush, or OTEL / OpenAI integrations). +4. The test inspects captured events, payloads, or raw HTTP requests, then normalizes and snapshots them where useful. Subprocess isolation keeps the SDK execution path close to production, including plain Node runs for auto-instrumentation hook coverage. ## Structure -``` +```text e2e/ -|- scenarios/ # Standalone scripts run as subprocesses -|- tests/ -| |- helpers/ # Harness, mock server, normalization, selectors, summaries -| |- *.test.ts # Trace, OTEL, and OpenAI coverage -| `- __snapshots__/ # Vitest snapshots +|- helpers/ # Shared harness, mock server, normalization, selectors, summaries +|- scenarios/ +| `- / +| |- scenario.ts # Default tsx entrypoint +| |- scenario.mjs # Default plain-Node entrypoint when needed +| |- scenario.test.ts # Colocated Vitest suite +| |- package.json # Optional slim scenario-local deps +| `- __snapshots__/ # Colocated snapshots `- vitest.config.mts ``` -## Helpers (`tests/helpers/`) +Any extra files needed only by one scenario stay in that scenario folder. Anything reused by multiple scenarios belongs in `e2e/helpers/`. + +## Helpers (`helpers/`) -- `scenario-harness.ts` - Starts the mock server, creates a unique test run id, and runs scenarios. +- `scenario-harness.ts` - Starts the mock server, creates a unique test run id, resolves scenario directories, and runs scenario folders. +- `scenario-installer.ts` - Installs optional scenario-local dependencies from a colocated `package.json`. - `mock-braintrust-server.ts` - Captures requests, merged log payloads, and parsed span-like events. - `normalize.ts` - Makes snapshots deterministic by normalizing ids, timestamps, paths, and mock-server URLs. - `trace-selectors.ts` / `trace-summary.ts` - Helpers for finding spans and snapshotting only the relevant shape. +- `scenario-runtime.ts` - Shared runtime utilities used by scenario entrypoints. - `openai.ts` - Shared scenario lists and assertions for OpenAI wrapper and hook coverage across v4/v5/v6. ### Writing a new test -Most tests use `withScenarioHarness(async (harness) => { ... })`. It gives each test a fresh server plus helpers for running scenarios and reading what the server captured. +Most tests use this pattern: + +```ts +const scenarioDir = resolveScenarioDir(import.meta.url); + +beforeAll(async () => { + await installScenarioDependencies({ scenarioDir }); +}); +``` + +`installScenarioDependencies(...)` is optional and only needed when the scenario folder has its own `package.json`. + +`withScenarioHarness(async (harness) => { ... })` gives each test a fresh server plus helpers for running scenarios and reading what the server captured. The main utilities you'll use in test files: -- `runScenario(path, timeoutMs?)` - Runs a TypeScript scenario with `tsx`. -- `runNodeScenario(path, nodeArgs?, timeoutMs?)` - Runs plain Node scenarios, used for `--import braintrust/hook.mjs`. +- `resolveScenarioDir(import.meta.url)` - Resolves the folder that contains the current test. +- `installScenarioDependencies({ scenarioDir })` - Installs optional scenario-local dependencies. +- `runScenarioDir({ scenarioDir, entry?, timeoutMs? })` - Runs a TypeScript scenario with `tsx`. +- `runNodeScenarioDir({ scenarioDir, entry?, nodeArgs?, timeoutMs? })` - Runs plain Node scenarios, used for `--import braintrust/hook.mjs`. - `testRunEvents()` - Returns parsed events tagged with the current test run id. - `events()`, `payloads()`, `requestCursor()`, `requestsAfter()` - Lower-level access for ingestion payloads and HTTP request flow assertions. - `testRunId` - Useful when a scenario or assertion needs the exact run marker. Use `normalizeForSnapshot(...)` before snapshotting. It replaces timestamps and ids with stable tokens and strips machine-specific paths and localhost ports. +### Scenario-local `package.json` + +Scenario-local manifests are optional and should stay slim. They are only for scenario-specific external dependencies, such as OpenAI version matrices. Shared test tooling and workspace-local packages stay in `e2e/package.json`. + +`workspace:` dependency specs are intentionally not supported in scenario-local manifests. If a scenario needs a workspace package, keep that dependency in `e2e/package.json`. + ## Running ```bash diff --git a/e2e/tests/helpers/mock-braintrust-server.ts b/e2e/helpers/mock-braintrust-server.ts similarity index 99% rename from e2e/tests/helpers/mock-braintrust-server.ts rename to e2e/helpers/mock-braintrust-server.ts index 2369c3abd..1de467465 100644 --- a/e2e/tests/helpers/mock-braintrust-server.ts +++ b/e2e/helpers/mock-braintrust-server.ts @@ -55,7 +55,7 @@ export type CapturedLogEvent = { }; }; -export interface MockBraintrustServer { +interface MockBraintrustServer { apiKey: string; close: () => Promise; events: CapturedLogEvent[]; diff --git a/e2e/tests/helpers/normalize.ts b/e2e/helpers/normalize.ts similarity index 94% rename from e2e/tests/helpers/normalize.ts rename to e2e/helpers/normalize.ts index 2f4c0e852..4df975f57 100644 --- a/e2e/tests/helpers/normalize.ts +++ b/e2e/helpers/normalize.ts @@ -22,13 +22,14 @@ const UUID_SUBSTRING_REGEX = /[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}/gi; const TIME_KEYS = new Set(["created", "start", "end"]); const SPAN_ID_KEYS = new Set(["id", "span_id", "root_span_id"]); +const ZERO_NUMBER_KEYS = new Set(["caller_lineno"]); const XACT_VERSION_KEYS = new Set([ "currentVersion", "initialVersion", "version", ]); const HELPERS_DIR = path.dirname(fileURLToPath(import.meta.url)); -const REPO_ROOT = path.resolve(HELPERS_DIR, "../../..").replace(/\\/g, "/"); +const REPO_ROOT = path.resolve(HELPERS_DIR, "../..").replace(/\\/g, "/"); const STACK_FRAME_REPO_PATH_REGEX = /(?:[A-Za-z]:)?[^\s)\n]*braintrust-sdk-javascript(?:[\\/](?:braintrust-sdk-javascript|[^\\/\s)\n]+))?((?:[\\/](?:e2e|js)[^:\s)\n]+)):\d+:\d+/g; const REPO_PATH_REGEX = @@ -59,7 +60,8 @@ function normalizeMockServerUrl(value: string): string | undefined { } function normalizeStackLikeString(value: string): string { - let normalized = value.replaceAll(REPO_ROOT, ""); + let normalized = value.replaceAll("file://", ""); + normalized = normalized.replaceAll(REPO_ROOT, ""); normalized = normalized.replace( STACK_FRAME_REPO_PATH_REGEX, @@ -146,6 +148,9 @@ function normalizeValue( } if (typeof value === "number") { + if (currentKey && ZERO_NUMBER_KEYS.has(currentKey)) { + return 0; + } if (currentKey && TIME_KEYS.has(currentKey)) { return 0; } diff --git a/e2e/tests/helpers/openai.ts b/e2e/helpers/openai.ts similarity index 88% rename from e2e/tests/helpers/openai.ts rename to e2e/helpers/openai.ts index b9e7dbbf4..f41c21782 100644 --- a/e2e/tests/helpers/openai.ts +++ b/e2e/helpers/openai.ts @@ -1,8 +1,8 @@ import type { CapturedLogEvent } from "./mock-braintrust-server"; import type { Json } from "./normalize"; -export interface OpenAIScenario { - scenarioPath: string; +interface OpenAIScenario { + entry: string; version: string; } @@ -25,14 +25,14 @@ export const OPENAI_SCENARIO_TIMEOUT_MS = 60_000; export const OPENAI_AUTO_HOOK_SCENARIOS: OpenAIScenario[] = OPENAI_VERSIONS.map( ({ suffix, version }) => ({ - scenarioPath: `scenarios/openai-auto-instrumentation-node-hook.openai-${suffix}.mjs`, + entry: `scenario.openai-${suffix}.mjs`, version, }), ); export const WRAP_OPENAI_SCENARIOS: OpenAIScenario[] = OPENAI_VERSIONS.map( ({ suffix, version }) => ({ - scenarioPath: `scenarios/wrap-openai-conversation-traces.openai-${suffix}.ts`, + entry: `scenario.openai-${suffix}.ts`, version, }), ); diff --git a/e2e/tests/helpers/scenario-harness.ts b/e2e/helpers/scenario-harness.ts similarity index 60% rename from e2e/tests/helpers/scenario-harness.ts rename to e2e/helpers/scenario-harness.ts index 452c092ae..e4fadcb9c 100644 --- a/e2e/tests/helpers/scenario-harness.ts +++ b/e2e/helpers/scenario-harness.ts @@ -1,25 +1,31 @@ import { spawn } from "node:child_process"; import { randomUUID } from "node:crypto"; +import { createRequire } from "node:module"; import * as path from "node:path"; +import { fileURLToPath } from "node:url"; import { startMockBraintrustServer, type CapturedLogEvent, type CapturedLogPayload, type CapturedRequest, } from "./mock-braintrust-server"; +import { + installScenarioDependencies, + type InstallScenarioDependenciesOptions, + type InstallScenarioDependenciesResult, +} from "./scenario-installer"; -export type EventPredicate = (event: CapturedLogEvent) => boolean; -export type PayloadPredicate = (payload: CapturedLogPayload) => boolean; -export type RequestPredicate = (request: CapturedRequest) => boolean; +type EventPredicate = (event: CapturedLogEvent) => boolean; +type PayloadPredicate = (payload: CapturedLogPayload) => boolean; +type RequestPredicate = (request: CapturedRequest) => boolean; -export interface ScenarioResult { +interface ScenarioResult { exitCode: number; stdout: string; stderr: string; } -const tsxCliPath = require.resolve("tsx/cli"); -const packageRoot = process.cwd(); +const tsxCliPath = createRequire(import.meta.url).resolve("tsx/cli"); const DEFAULT_SCENARIO_TIMEOUT_MS = 15_000; function isRecord(value: unknown): value is Record { @@ -66,12 +72,13 @@ function getTestServerEnv( async function runProcess( args: string[], + cwd: string, env: Record, timeoutMs: number, ): Promise { return await new Promise((resolve, reject) => { const child = spawn(process.execPath, args, { - cwd: packageRoot, + cwd, env: { ...process.env, ...env, @@ -111,36 +118,75 @@ async function runProcess( }); } -async function runScenarioOrThrow( - relativeScenarioPath: string, +function resolveEntryPath(scenarioDir: string, entry: string): string { + return path.join(scenarioDir, entry); +} + +async function runScenarioDirOrThrow( + scenarioDir: string, env: Record, options: { + entry: string; nodeArgs?: string[]; timeoutMs?: number; useTsx?: boolean; - } = {}, + } = { + entry: "scenario.ts", + }, ): Promise { - const scenarioPath = path.join(packageRoot, relativeScenarioPath); + const scenarioPath = resolveEntryPath(scenarioDir, options.entry); const args = options.useTsx === false ? [...(options.nodeArgs ?? []), scenarioPath] : [tsxCliPath, scenarioPath]; const result = await runProcess( args, + scenarioDir, env, options.timeoutMs ?? DEFAULT_SCENARIO_TIMEOUT_MS, ); if (result.exitCode !== 0) { throw new Error( - `Scenario ${relativeScenarioPath} failed with exit code ${result.exitCode}\nSTDOUT:\n${result.stdout}\nSTDERR:\n${result.stderr}`, + `Scenario ${path.join(scenarioDir, options.entry)} failed with exit code ${result.exitCode}\nSTDOUT:\n${result.stdout}\nSTDERR:\n${result.stderr}`, ); } return result; } -export interface ScenarioHarness { +export function resolveScenarioDir(importMetaUrl: string): string { + return path.dirname(fileURLToPath(importMetaUrl)); +} + +export async function runScenarioDir(options: { + env?: Record; + entry?: string; + scenarioDir: string; + timeoutMs?: number; +}): Promise { + return await runScenarioDirOrThrow(options.scenarioDir, options.env ?? {}, { + entry: options.entry ?? "scenario.ts", + timeoutMs: options.timeoutMs, + }); +} + +export async function runNodeScenarioDir(options: { + env?: Record; + entry?: string; + nodeArgs?: string[]; + scenarioDir: string; + timeoutMs?: number; +}): Promise { + return await runScenarioDirOrThrow(options.scenarioDir, options.env ?? {}, { + entry: options.entry ?? "scenario.mjs", + nodeArgs: options.nodeArgs, + timeoutMs: options.timeoutMs, + useTsx: false, + }); +} + +interface ScenarioHarness { events: (predicate?: EventPredicate) => CapturedLogEvent[]; payloads: (predicate?: PayloadPredicate) => CapturedLogPayload[]; requestCursor: () => number; @@ -148,15 +194,19 @@ export interface ScenarioHarness { after: number, predicate?: RequestPredicate, ) => CapturedRequest[]; - runNodeScenario: ( - relativeScenarioPath: string, - args?: string[], - timeoutMs?: number, - ) => Promise; - runScenario: ( - relativeScenarioPath: string, - timeoutMs?: number, - ) => Promise; + runNodeScenarioDir: (options: { + entry?: string; + env?: Record; + nodeArgs?: string[]; + scenarioDir: string; + timeoutMs?: number; + }) => Promise; + runScenarioDir: (options: { + entry?: string; + env?: Record; + scenarioDir: string; + timeoutMs?: number; + }) => Promise; testRunEvents: (predicate?: EventPredicate) => CapturedLogEvent[]; testRunId: string; } @@ -175,15 +225,21 @@ export async function withScenarioHarness( requestCursor: () => server.requests.length, requestsAfter: (after, predicate) => filterItems(server.requests.slice(after), predicate), - runNodeScenario: (relativeScenarioPath, args = [], timeoutMs) => - runScenarioOrThrow(relativeScenarioPath, testEnv, { - nodeArgs: args, - timeoutMs, - useTsx: false, + runNodeScenarioDir: (options) => + runNodeScenarioDir({ + ...options, + env: { + ...testEnv, + ...(options.env ?? {}), + }, }), - runScenario: (relativeScenarioPath, timeoutMs) => - runScenarioOrThrow(relativeScenarioPath, testEnv, { - timeoutMs, + runScenarioDir: (options) => + runScenarioDir({ + ...options, + env: { + ...testEnv, + ...(options.env ?? {}), + }, }), testRunEvents: (predicate) => filterItems( @@ -198,3 +254,9 @@ export async function withScenarioHarness( await server.close(); } } + +export { + installScenarioDependencies, + type InstallScenarioDependenciesResult, + type InstallScenarioDependenciesOptions, +}; diff --git a/e2e/helpers/scenario-installer.ts b/e2e/helpers/scenario-installer.ts new file mode 100644 index 000000000..09b965ead --- /dev/null +++ b/e2e/helpers/scenario-installer.ts @@ -0,0 +1,123 @@ +import { promises as fs } from "node:fs"; +import { spawn } from "node:child_process"; +import * as path from "node:path"; + +export type InstallScenarioDependenciesResult = + | { status: "no-manifest" } + | { status: "installed" }; + +export interface InstallScenarioDependenciesOptions { + preferOffline?: boolean; + scenarioDir: string; +} + +const PNPM_COMMAND = process.platform === "win32" ? "pnpm.cmd" : "pnpm"; + +async function fileExists(filePath: string): Promise { + try { + await fs.access(filePath); + return true; + } catch { + return false; + } +} + +async function spawnOrThrow( + command: string, + args: string[], + cwd: string, +): Promise { + return await new Promise((resolve, reject) => { + const child = spawn(command, args, { + cwd, + env: process.env, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stdout = ""; + let stderr = ""; + + child.stdout.on("data", (chunk) => { + stdout += chunk.toString(); + }); + child.stderr.on("data", (chunk) => { + stderr += chunk.toString(); + }); + child.on("error", reject); + child.on("close", (code) => { + if (code === 0) { + resolve(stdout.trim()); + return; + } + + reject( + new Error( + `${command} ${args.join(" ")} failed with exit code ${code ?? 0}\nSTDOUT:\n${stdout}\nSTDERR:\n${stderr}`, + ), + ); + }); + }); +} + +function findWorkspaceSpecs( + manifest: Record, +): Array<{ name: string; section: string; spec: string }> { + const dependencySections = [ + "dependencies", + "devDependencies", + "optionalDependencies", + "peerDependencies", + ] as const; + + return dependencySections.flatMap((section) => { + const value = manifest[section]; + if (!value || typeof value !== "object" || Array.isArray(value)) { + return []; + } + + return Object.entries(value).flatMap(([name, spec]) => { + if (typeof spec === "string" && spec.startsWith("workspace:")) { + return [{ name, section, spec }]; + } + return []; + }); + }); +} + +export async function installScenarioDependencies({ + preferOffline = true, + scenarioDir, +}: InstallScenarioDependenciesOptions): Promise { + const manifestPath = path.join(scenarioDir, "package.json"); + if (!(await fileExists(manifestPath))) { + return { status: "no-manifest" }; + } + + const manifestRaw = await fs.readFile(manifestPath, "utf8"); + const manifest = JSON.parse(manifestRaw) as Record; + const workspaceSpecs = findWorkspaceSpecs(manifest); + if (workspaceSpecs.length > 0) { + const details = workspaceSpecs + .map(({ name, section, spec }) => `${section}.${name} -> ${spec}`) + .join(", "); + throw new Error( + `Scenario package.json in ${scenarioDir} cannot use workspace: dependencies (${details}). Keep workspace packages in e2e/package.json or use a non-workspace spec.`, + ); + } + + const installArgs = [ + "install", + "--dir", + scenarioDir, + "--ignore-workspace", + "--no-lockfile", + "--no-frozen-lockfile", + "--strict-peer-dependencies=false", + ]; + if (preferOffline) { + installArgs.push("--prefer-offline"); + } + + await spawnOrThrow(PNPM_COMMAND, installArgs, scenarioDir); + return { status: "installed" }; +} diff --git a/e2e/scenarios/helpers.ts b/e2e/helpers/scenario-runtime.ts similarity index 100% rename from e2e/scenarios/helpers.ts rename to e2e/helpers/scenario-runtime.ts diff --git a/e2e/tests/helpers/trace-selectors.ts b/e2e/helpers/trace-selectors.ts similarity index 96% rename from e2e/tests/helpers/trace-selectors.ts rename to e2e/helpers/trace-selectors.ts index 603108f48..c4b303ed3 100644 --- a/e2e/tests/helpers/trace-selectors.ts +++ b/e2e/helpers/trace-selectors.ts @@ -1,6 +1,6 @@ import type { CapturedLogEvent } from "./mock-braintrust-server"; -export function findLatestEvent( +function findLatestEvent( events: CapturedLogEvent[], predicate: (event: CapturedLogEvent) => boolean, ): CapturedLogEvent | undefined { diff --git a/e2e/tests/helpers/trace-summary.ts b/e2e/helpers/trace-summary.ts similarity index 99% rename from e2e/tests/helpers/trace-summary.ts rename to e2e/helpers/trace-summary.ts index 8dde332dc..09906921c 100644 --- a/e2e/tests/helpers/trace-summary.ts +++ b/e2e/helpers/trace-summary.ts @@ -91,7 +91,7 @@ function otlpAttributeValue(value: unknown): Json { return null; } -export type OtlpSpanSummary = { +type OtlpSpanSummary = { attributes: Record; name: string; parentSpanId?: string; diff --git a/e2e/package.json b/e2e/package.json index 4cec858eb..5cd56b69d 100644 --- a/e2e/package.json +++ b/e2e/package.json @@ -2,6 +2,7 @@ "name": "@braintrust/js-e2e-tests", "version": "0.0.0", "private": true, + "type": "module", "scripts": { "test:e2e": "vitest run", "test:e2e:update": "vitest run --update" @@ -13,9 +14,6 @@ "@opentelemetry/sdk-trace-base": ">=1.9.0", "@types/node": "^20.10.5", "braintrust": "workspace:^", - "openai": "6.25.0", - "openai-v4": "npm:openai@4.104.0", - "openai-v5": "npm:openai@5.11.0", "tsx": "^3.14.0", "typescript": "5.4.4", "vitest": "^2.1.9" diff --git a/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v4.mjs b/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v4.mjs deleted file mode 100644 index fbfb90974..000000000 --- a/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v4.mjs +++ /dev/null @@ -1,4 +0,0 @@ -import OpenAI from "openai-v4"; -import { runOpenAIAutoInstrumentationNodeHookOrExit } from "./openai-auto-instrumentation-node-hook.impl.mjs"; - -runOpenAIAutoInstrumentationNodeHookOrExit(OpenAI, "4.104.0"); diff --git a/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v5.mjs b/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v5.mjs deleted file mode 100644 index 4611d8c5f..000000000 --- a/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v5.mjs +++ /dev/null @@ -1,4 +0,0 @@ -import OpenAI from "openai-v5"; -import { runOpenAIAutoInstrumentationNodeHookOrExit } from "./openai-auto-instrumentation-node-hook.impl.mjs"; - -runOpenAIAutoInstrumentationNodeHookOrExit(OpenAI, "5.11.0"); diff --git a/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v6.mjs b/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v6.mjs deleted file mode 100644 index 818be68e3..000000000 --- a/e2e/scenarios/openai-auto-instrumentation-node-hook.openai-v6.mjs +++ /dev/null @@ -1,4 +0,0 @@ -import OpenAI from "openai"; -import { runOpenAIAutoInstrumentationNodeHookOrExit } from "./openai-auto-instrumentation-node-hook.impl.mjs"; - -runOpenAIAutoInstrumentationNodeHookOrExit(OpenAI, "6.25.0"); diff --git a/e2e/tests/__snapshots__/openai-auto-instrumentation-node-hook.test.ts.snap b/e2e/scenarios/openai-auto-instrumentation-node-hook/__snapshots__/scenario.test.ts.snap similarity index 100% rename from e2e/tests/__snapshots__/openai-auto-instrumentation-node-hook.test.ts.snap rename to e2e/scenarios/openai-auto-instrumentation-node-hook/__snapshots__/scenario.test.ts.snap diff --git a/e2e/scenarios/openai-auto-instrumentation-node-hook/package.json b/e2e/scenarios/openai-auto-instrumentation-node-hook/package.json new file mode 100644 index 000000000..08871ee43 --- /dev/null +++ b/e2e/scenarios/openai-auto-instrumentation-node-hook/package.json @@ -0,0 +1,9 @@ +{ + "name": "@braintrust/e2e-openai-auto-instrumentation-node-hook", + "private": true, + "dependencies": { + "openai": "6.25.0", + "openai-v4": "npm:openai@4.104.0", + "openai-v5": "npm:openai@5.11.0" + } +} diff --git a/e2e/scenarios/openai-auto-instrumentation-node-hook.impl.mjs b/e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.impl.mjs similarity index 100% rename from e2e/scenarios/openai-auto-instrumentation-node-hook.impl.mjs rename to e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.impl.mjs diff --git a/e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.openai-v4.mjs b/e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.openai-v4.mjs new file mode 100644 index 000000000..18be93ee3 --- /dev/null +++ b/e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.openai-v4.mjs @@ -0,0 +1,4 @@ +import OpenAI from "openai-v4"; +import { runOpenAIAutoInstrumentationNodeHookOrExit } from "./scenario.impl.mjs"; + +runOpenAIAutoInstrumentationNodeHookOrExit(OpenAI, "4.104.0"); diff --git a/e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.openai-v5.mjs b/e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.openai-v5.mjs new file mode 100644 index 000000000..2df8abe18 --- /dev/null +++ b/e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.openai-v5.mjs @@ -0,0 +1,4 @@ +import OpenAI from "openai-v5"; +import { runOpenAIAutoInstrumentationNodeHookOrExit } from "./scenario.impl.mjs"; + +runOpenAIAutoInstrumentationNodeHookOrExit(OpenAI, "5.11.0"); diff --git a/e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.openai-v6.mjs b/e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.openai-v6.mjs new file mode 100644 index 000000000..617498e34 --- /dev/null +++ b/e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.openai-v6.mjs @@ -0,0 +1,4 @@ +import OpenAI from "openai"; +import { runOpenAIAutoInstrumentationNodeHookOrExit } from "./scenario.impl.mjs"; + +runOpenAIAutoInstrumentationNodeHookOrExit(OpenAI, "6.25.0"); diff --git a/e2e/tests/openai-auto-instrumentation-node-hook.test.ts b/e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.test.ts similarity index 62% rename from e2e/tests/openai-auto-instrumentation-node-hook.test.ts rename to e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.test.ts index ea77c504a..4a763d803 100644 --- a/e2e/tests/openai-auto-instrumentation-node-hook.test.ts +++ b/e2e/scenarios/openai-auto-instrumentation-node-hook/scenario.test.ts @@ -1,21 +1,35 @@ -import { expect, test } from "vitest"; -import { normalizeForSnapshot, type Json } from "./helpers/normalize"; +import { beforeAll, expect, test } from "vitest"; +import { normalizeForSnapshot, type Json } from "../../helpers/normalize"; import { OPENAI_AUTO_HOOK_SCENARIOS, OPENAI_SCENARIO_TIMEOUT_MS, summarizeOpenAIContract, -} from "./helpers/openai"; -import { withScenarioHarness } from "./helpers/scenario-harness"; -import { findLatestChildSpan, findLatestSpan } from "./helpers/trace-selectors"; +} from "../../helpers/openai"; +import { + installScenarioDependencies, + resolveScenarioDir, + withScenarioHarness, +} from "../../helpers/scenario-harness"; +import { + findLatestChildSpan, + findLatestSpan, +} from "../../helpers/trace-selectors"; + +const scenarioDir = resolveScenarioDir(import.meta.url); + +beforeAll(async () => { + await installScenarioDependencies({ scenarioDir }); +}); for (const scenario of OPENAI_AUTO_HOOK_SCENARIOS) { test(`openai auto-instrumentation via node hook collects traces without manual wrapping (openai ${scenario.version})`, async () => { - await withScenarioHarness(async ({ events, runNodeScenario }) => { - await runNodeScenario( - scenario.scenarioPath, - ["--import", "braintrust/hook.mjs"], - OPENAI_SCENARIO_TIMEOUT_MS, - ); + await withScenarioHarness(async ({ events, runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.entry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + scenarioDir, + timeoutMs: OPENAI_SCENARIO_TIMEOUT_MS, + }); const capturedEvents = events(); const root = findLatestSpan(capturedEvents, "openai-auto-hook-root"); diff --git a/e2e/tests/__snapshots__/otel-compat-mixed-tracing.test.ts.snap b/e2e/scenarios/otel-compat-mixed-tracing/__snapshots__/scenario.test.ts.snap similarity index 100% rename from e2e/tests/__snapshots__/otel-compat-mixed-tracing.test.ts.snap rename to e2e/scenarios/otel-compat-mixed-tracing/__snapshots__/scenario.test.ts.snap diff --git a/e2e/tests/otel-compat-mixed-tracing.test.ts b/e2e/scenarios/otel-compat-mixed-tracing/scenario.test.ts similarity index 71% rename from e2e/tests/otel-compat-mixed-tracing.test.ts rename to e2e/scenarios/otel-compat-mixed-tracing/scenario.test.ts index a280ec488..2084bb66f 100644 --- a/e2e/tests/otel-compat-mixed-tracing.test.ts +++ b/e2e/scenarios/otel-compat-mixed-tracing/scenario.test.ts @@ -1,13 +1,18 @@ import { expect, test } from "vitest"; -import { normalizeForSnapshot, type Json } from "./helpers/normalize"; -import { withScenarioHarness } from "./helpers/scenario-harness"; -import { findLatestSpan } from "./helpers/trace-selectors"; -import { extractOtelSpans, summarizeEvent } from "./helpers/trace-summary"; +import { normalizeForSnapshot, type Json } from "../../helpers/normalize"; +import { + resolveScenarioDir, + withScenarioHarness, +} from "../../helpers/scenario-harness"; +import { findLatestSpan } from "../../helpers/trace-selectors"; +import { extractOtelSpans, summarizeEvent } from "../../helpers/trace-summary"; + +const scenarioDir = resolveScenarioDir(import.meta.url); test("otel-compat-mixed-tracing unifies Braintrust and OTEL spans into one trace", async () => { await withScenarioHarness( - async ({ requestsAfter, runScenario, testRunEvents }) => { - await runScenario("scenarios/otel-compat-mixed-tracing.ts"); + async ({ requestsAfter, runScenarioDir, testRunEvents }) => { + await runScenarioDir({ scenarioDir }); const btEvents = testRunEvents(); const btRoot = findLatestSpan(btEvents, "bt-root"); diff --git a/e2e/scenarios/otel-compat-mixed-tracing.ts b/e2e/scenarios/otel-compat-mixed-tracing/scenario.ts similarity index 98% rename from e2e/scenarios/otel-compat-mixed-tracing.ts rename to e2e/scenarios/otel-compat-mixed-tracing/scenario.ts index 0601265a9..f3a2d4bb1 100644 --- a/e2e/scenarios/otel-compat-mixed-tracing.ts +++ b/e2e/scenarios/otel-compat-mixed-tracing/scenario.ts @@ -7,7 +7,7 @@ import { getTestRunId, runMain, scopedName, -} from "./helpers"; +} from "../../helpers/scenario-runtime"; async function main() { const testRunId = getTestRunId(); diff --git a/e2e/tests/otel-span-processor-export.test.ts b/e2e/scenarios/otel-span-processor-export/scenario.test.ts similarity index 73% rename from e2e/tests/otel-span-processor-export.test.ts rename to e2e/scenarios/otel-span-processor-export/scenario.test.ts index f344f3097..cc1d38d24 100644 --- a/e2e/tests/otel-span-processor-export.test.ts +++ b/e2e/scenarios/otel-span-processor-export/scenario.test.ts @@ -1,11 +1,19 @@ import { expect, test } from "vitest"; -import { withScenarioHarness } from "./helpers/scenario-harness"; -import { extractOtelSpans, summarizeRequest } from "./helpers/trace-summary"; +import { + resolveScenarioDir, + withScenarioHarness, +} from "../../helpers/scenario-harness"; +import { + extractOtelSpans, + summarizeRequest, +} from "../../helpers/trace-summary"; + +const scenarioDir = resolveScenarioDir(import.meta.url); test("otel-span-processor-export sends filtered OTLP traces to Braintrust", async () => { await withScenarioHarness( - async ({ requestsAfter, runScenario, testRunId }) => { - await runScenario("scenarios/otel-span-processor-export.ts"); + async ({ requestsAfter, runScenarioDir, testRunId }) => { + await runScenarioDir({ scenarioDir }); const requests = requestsAfter( 0, diff --git a/e2e/scenarios/otel-span-processor-export.ts b/e2e/scenarios/otel-span-processor-export/scenario.ts similarity index 96% rename from e2e/scenarios/otel-span-processor-export.ts rename to e2e/scenarios/otel-span-processor-export/scenario.ts index af487fb56..c7d972ee7 100644 --- a/e2e/scenarios/otel-span-processor-export.ts +++ b/e2e/scenarios/otel-span-processor-export/scenario.ts @@ -5,7 +5,7 @@ import { getTestRunId, runMain, scopedName, -} from "./helpers"; +} from "../../helpers/scenario-runtime"; async function main() { const testRunId = getTestRunId(); diff --git a/e2e/tests/__snapshots__/trace-context-and-continuation.test.ts.snap b/e2e/scenarios/trace-context-and-continuation/__snapshots__/scenario.test.ts.snap similarity index 98% rename from e2e/tests/__snapshots__/trace-context-and-continuation.test.ts.snap rename to e2e/scenarios/trace-context-and-continuation/__snapshots__/scenario.test.ts.snap index 2c327a559..022911646 100644 --- a/e2e/tests/__snapshots__/trace-context-and-continuation.test.ts.snap +++ b/e2e/scenarios/trace-context-and-continuation/__snapshots__/scenario.test.ts.snap @@ -4,9 +4,9 @@ exports[`trace-context-and-continuation supports reattachment and late span upda [ { "context": { - "caller_filename": "/e2e/scenarios/trace-context-and-continuation.ts", + "caller_filename": "/e2e/scenarios/trace-context-and-continuation/scenario.ts", "caller_functionname": "main", - "caller_lineno": 70, + "caller_lineno": 0, }, "created": "", "id": "", diff --git a/e2e/tests/trace-context-and-continuation.test.ts b/e2e/scenarios/trace-context-and-continuation/scenario.test.ts similarity index 81% rename from e2e/tests/trace-context-and-continuation.test.ts rename to e2e/scenarios/trace-context-and-continuation/scenario.test.ts index 638e9b33c..12a3b2bfb 100644 --- a/e2e/tests/trace-context-and-continuation.test.ts +++ b/e2e/scenarios/trace-context-and-continuation/scenario.test.ts @@ -1,13 +1,18 @@ import { expect, test } from "vitest"; -import { normalizeForSnapshot, type Json } from "./helpers/normalize"; -import { withScenarioHarness } from "./helpers/scenario-harness"; -import { findLatestSpan } from "./helpers/trace-selectors"; -import { summarizeEvent } from "./helpers/trace-summary"; +import { normalizeForSnapshot, type Json } from "../../helpers/normalize"; +import { + resolveScenarioDir, + withScenarioHarness, +} from "../../helpers/scenario-harness"; +import { findLatestSpan } from "../../helpers/trace-selectors"; +import { summarizeEvent } from "../../helpers/trace-summary"; + +const scenarioDir = resolveScenarioDir(import.meta.url); test("trace-context-and-continuation supports reattachment and late span updates", async () => { await withScenarioHarness( - async ({ payloads, runScenario, testRunEvents, testRunId }) => { - await runScenario("scenarios/trace-context-and-continuation.ts"); + async ({ payloads, runScenarioDir, testRunEvents, testRunId }) => { + await runScenarioDir({ scenarioDir }); const capturedEvents = testRunEvents(); const root = findLatestSpan(capturedEvents, "context-root"); diff --git a/e2e/scenarios/trace-context-and-continuation.ts b/e2e/scenarios/trace-context-and-continuation/scenario.ts similarity index 95% rename from e2e/scenarios/trace-context-and-continuation.ts rename to e2e/scenarios/trace-context-and-continuation/scenario.ts index 156787469..c0bb88ec3 100644 --- a/e2e/scenarios/trace-context-and-continuation.ts +++ b/e2e/scenarios/trace-context-and-continuation/scenario.ts @@ -7,7 +7,11 @@ import { withCurrent, withParent, } from "braintrust"; -import { getTestRunId, runMain, scopedName } from "./helpers"; +import { + getTestRunId, + runMain, + scopedName, +} from "../../helpers/scenario-runtime"; async function main() { const testRunId = getTestRunId(); diff --git a/e2e/tests/__snapshots__/trace-primitives-basic.test.ts.snap b/e2e/scenarios/trace-primitives-basic/__snapshots__/scenario.test.ts.snap similarity index 88% rename from e2e/tests/__snapshots__/trace-primitives-basic.test.ts.snap rename to e2e/scenarios/trace-primitives-basic/__snapshots__/scenario.test.ts.snap index f58d7f990..d62c9d559 100644 --- a/e2e/tests/__snapshots__/trace-primitives-basic.test.ts.snap +++ b/e2e/scenarios/trace-primitives-basic/__snapshots__/scenario.test.ts.snap @@ -40,9 +40,9 @@ exports[`trace-primitives-basic collects a minimal manual trace tree > request-f { "_is_merge": false, "context": { - "caller_filename": "/e2e/scenarios/trace-primitives-basic.ts", + "caller_filename": "/e2e/scenarios/trace-primitives-basic/scenario.ts", "caller_functionname": "main", - "caller_lineno": 10, + "caller_lineno": 0, }, "created": "", "id": "", @@ -78,9 +78,9 @@ exports[`trace-primitives-basic collects a minimal manual trace tree > request-f { "_is_merge": false, "context": { - "caller_filename": "/e2e/scenarios/trace-primitives-basic.ts", + "caller_filename": "/e2e/scenarios/trace-primitives-basic/scenario.ts", "caller_functionname": "main", - "caller_lineno": 10, + "caller_lineno": 0, }, "created": "", "id": "", @@ -115,9 +115,9 @@ exports[`trace-primitives-basic collects a minimal manual trace tree > request-f "rows": [ { "context": { - "caller_filename": "/e2e/scenarios/trace-primitives-basic.ts", + "caller_filename": "/e2e/scenarios/trace-primitives-basic/scenario.ts", "caller_functionname": "logger.traced.name", - "caller_lineno": 12, + "caller_lineno": 0, }, "created": "", "id": "", @@ -150,24 +150,24 @@ exports[`trace-primitives-basic collects a minimal manual trace tree > request-f }, { "context": { - "caller_filename": "/e2e/scenarios/trace-primitives-basic.ts", + "caller_filename": "/e2e/scenarios/trace-primitives-basic/scenario.ts", "caller_functionname": "logger.traced.name", - "caller_lineno": 32, + "caller_lineno": 0, }, "created": "", "error": "basic boom Error: basic boom - at logger.traced.name (/e2e/scenarios/trace-primitives-basic.ts:0:0) - at /js/dist/index.js:0:0 + at logger.traced.name (/e2e/scenarios/trace-primitives-basic/scenario.ts:0:0) + at /js/dist/index.mjs:0:0 at AsyncLocalStorage.run (node::0:0) - at BraintrustContextManager.runInContext (/js/dist/index.js:0:0) - at withCurrent (/js/dist/index.js:0:0) - at /js/dist/index.js:0:0 - at runCatchFinally (/js/dist/index.js:0:0) - at _class9.traced (/js/dist/index.js:0:0) - at main (/e2e/scenarios/trace-primitives-basic.ts:0:0) - at runMain (/e2e/scenarios/helpers.ts:0:0)", + at BraintrustContextManager.runInContext (/js/dist/index.mjs:0:0) + at withCurrent (/js/dist/index.mjs:0:0) + at /js/dist/index.mjs:0:0 + at runCatchFinally (/js/dist/index.mjs:0:0) + at Logger.traced (/js/dist/index.mjs:0:0) + at main (/e2e/scenarios/trace-primitives-basic/scenario.ts:0:0) + at runMain (/e2e/helpers/scenario-runtime.ts:0:0)", "id": "", "log_id": "g", "metadata": { @@ -213,9 +213,9 @@ Error: basic boom "rows": [ { "context": { - "caller_filename": "/e2e/scenarios/trace-primitives-basic.ts", + "caller_filename": "/e2e/scenarios/trace-primitives-basic/scenario.ts", "caller_functionname": "logger.traced.name", - "caller_lineno": 12, + "caller_lineno": 0, }, "created": "", "id": "", @@ -248,24 +248,24 @@ Error: basic boom }, { "context": { - "caller_filename": "/e2e/scenarios/trace-primitives-basic.ts", + "caller_filename": "/e2e/scenarios/trace-primitives-basic/scenario.ts", "caller_functionname": "logger.traced.name", - "caller_lineno": 32, + "caller_lineno": 0, }, "created": "", "error": "basic boom Error: basic boom - at logger.traced.name (/e2e/scenarios/trace-primitives-basic.ts:0:0) - at /js/dist/index.js:0:0 + at logger.traced.name (/e2e/scenarios/trace-primitives-basic/scenario.ts:0:0) + at /js/dist/index.mjs:0:0 at AsyncLocalStorage.run (node::0:0) - at BraintrustContextManager.runInContext (/js/dist/index.js:0:0) - at withCurrent (/js/dist/index.js:0:0) - at /js/dist/index.js:0:0 - at runCatchFinally (/js/dist/index.js:0:0) - at _class9.traced (/js/dist/index.js:0:0) - at main (/e2e/scenarios/trace-primitives-basic.ts:0:0) - at runMain (/e2e/scenarios/helpers.ts:0:0)", + at BraintrustContextManager.runInContext (/js/dist/index.mjs:0:0) + at withCurrent (/js/dist/index.mjs:0:0) + at /js/dist/index.mjs:0:0 + at runCatchFinally (/js/dist/index.mjs:0:0) + at Logger.traced (/js/dist/index.mjs:0:0) + at main (/e2e/scenarios/trace-primitives-basic/scenario.ts:0:0) + at runMain (/e2e/helpers/scenario-runtime.ts:0:0)", "id": "", "log_id": "g", "metadata": { diff --git a/e2e/tests/trace-primitives-basic.test.ts b/e2e/scenarios/trace-primitives-basic/scenario.test.ts similarity index 75% rename from e2e/tests/trace-primitives-basic.test.ts rename to e2e/scenarios/trace-primitives-basic/scenario.test.ts index 0c9e7242e..ac20a6c2c 100644 --- a/e2e/tests/trace-primitives-basic.test.ts +++ b/e2e/scenarios/trace-primitives-basic/scenario.test.ts @@ -1,15 +1,20 @@ import { expect, test } from "vitest"; -import { normalizeForSnapshot, type Json } from "./helpers/normalize"; -import { withScenarioHarness } from "./helpers/scenario-harness"; -import { findLatestSpan } from "./helpers/trace-selectors"; -import { summarizeEvent, summarizeRequest } from "./helpers/trace-summary"; +import { normalizeForSnapshot, type Json } from "../../helpers/normalize"; +import { + resolveScenarioDir, + withScenarioHarness, +} from "../../helpers/scenario-harness"; +import { findLatestSpan } from "../../helpers/trace-selectors"; +import { summarizeEvent, summarizeRequest } from "../../helpers/trace-summary"; + +const scenarioDir = resolveScenarioDir(import.meta.url); test("trace-primitives-basic collects a minimal manual trace tree", async () => { await withScenarioHarness( - async ({ requestCursor, requestsAfter, runScenario, testRunEvents }) => { + async ({ requestCursor, requestsAfter, runScenarioDir, testRunEvents }) => { const cursor = requestCursor(); - await runScenario("scenarios/trace-primitives-basic.ts"); + await runScenarioDir({ scenarioDir }); const capturedEvents = testRunEvents(); const root = findLatestSpan(capturedEvents, "trace-primitives-root"); diff --git a/e2e/scenarios/trace-primitives-basic.ts b/e2e/scenarios/trace-primitives-basic/scenario.ts similarity index 93% rename from e2e/scenarios/trace-primitives-basic.ts rename to e2e/scenarios/trace-primitives-basic/scenario.ts index e3850d11b..442ce6518 100644 --- a/e2e/scenarios/trace-primitives-basic.ts +++ b/e2e/scenarios/trace-primitives-basic/scenario.ts @@ -1,5 +1,9 @@ import { initLogger, logError, startSpan } from "braintrust"; -import { getTestRunId, runMain, scopedName } from "./helpers"; +import { + getTestRunId, + runMain, + scopedName, +} from "../../helpers/scenario-runtime"; async function main() { const testRunId = getTestRunId(); diff --git a/e2e/scenarios/wrap-openai-conversation-traces.openai-v4.ts b/e2e/scenarios/wrap-openai-conversation-traces.openai-v4.ts deleted file mode 100644 index 7ab124a56..000000000 --- a/e2e/scenarios/wrap-openai-conversation-traces.openai-v4.ts +++ /dev/null @@ -1,5 +0,0 @@ -import OpenAI from "openai-v4"; -import { runMain } from "./helpers"; -import { runWrapOpenAIConversationTraces } from "./wrap-openai-conversation-traces.impl"; - -runMain(() => runWrapOpenAIConversationTraces(OpenAI, "4.104.0")); diff --git a/e2e/scenarios/wrap-openai-conversation-traces.openai-v5.ts b/e2e/scenarios/wrap-openai-conversation-traces.openai-v5.ts deleted file mode 100644 index 31b35745e..000000000 --- a/e2e/scenarios/wrap-openai-conversation-traces.openai-v5.ts +++ /dev/null @@ -1,5 +0,0 @@ -import OpenAI from "openai-v5"; -import { runMain } from "./helpers"; -import { runWrapOpenAIConversationTraces } from "./wrap-openai-conversation-traces.impl"; - -runMain(() => runWrapOpenAIConversationTraces(OpenAI, "5.11.0")); diff --git a/e2e/scenarios/wrap-openai-conversation-traces.openai-v6.ts b/e2e/scenarios/wrap-openai-conversation-traces.openai-v6.ts deleted file mode 100644 index 10f70bac2..000000000 --- a/e2e/scenarios/wrap-openai-conversation-traces.openai-v6.ts +++ /dev/null @@ -1,5 +0,0 @@ -import OpenAI from "openai"; -import { runMain } from "./helpers"; -import { runWrapOpenAIConversationTraces } from "./wrap-openai-conversation-traces.impl"; - -runMain(() => runWrapOpenAIConversationTraces(OpenAI, "6.25.0")); diff --git a/e2e/tests/__snapshots__/wrap-openai-conversation-traces.test.ts.snap b/e2e/scenarios/wrap-openai-conversation-traces/__snapshots__/scenario.test.ts.snap similarity index 100% rename from e2e/tests/__snapshots__/wrap-openai-conversation-traces.test.ts.snap rename to e2e/scenarios/wrap-openai-conversation-traces/__snapshots__/scenario.test.ts.snap diff --git a/e2e/scenarios/wrap-openai-conversation-traces/package.json b/e2e/scenarios/wrap-openai-conversation-traces/package.json new file mode 100644 index 000000000..122bbbca6 --- /dev/null +++ b/e2e/scenarios/wrap-openai-conversation-traces/package.json @@ -0,0 +1,9 @@ +{ + "name": "@braintrust/e2e-wrap-openai-conversation-traces", + "private": true, + "dependencies": { + "openai": "6.25.0", + "openai-v4": "npm:openai@4.104.0", + "openai-v5": "npm:openai@5.11.0" + } +} diff --git a/e2e/scenarios/wrap-openai-conversation-traces.impl.ts b/e2e/scenarios/wrap-openai-conversation-traces/scenario.impl.ts similarity index 96% rename from e2e/scenarios/wrap-openai-conversation-traces.impl.ts rename to e2e/scenarios/wrap-openai-conversation-traces/scenario.impl.ts index 9ad5fd501..4f380daf2 100644 --- a/e2e/scenarios/wrap-openai-conversation-traces.impl.ts +++ b/e2e/scenarios/wrap-openai-conversation-traces/scenario.impl.ts @@ -1,5 +1,9 @@ import { initLogger, startSpan, withCurrent, wrapOpenAI } from "braintrust"; -import { collectAsync, getTestRunId, scopedName } from "./helpers"; +import { + collectAsync, + getTestRunId, + scopedName, +} from "../../helpers/scenario-runtime"; const OPENAI_MODEL = "gpt-4o-mini"; diff --git a/e2e/scenarios/wrap-openai-conversation-traces/scenario.openai-v4.ts b/e2e/scenarios/wrap-openai-conversation-traces/scenario.openai-v4.ts new file mode 100644 index 000000000..6ac3405ca --- /dev/null +++ b/e2e/scenarios/wrap-openai-conversation-traces/scenario.openai-v4.ts @@ -0,0 +1,5 @@ +import OpenAI from "openai-v4"; +import { runMain } from "../../helpers/scenario-runtime"; +import { runWrapOpenAIConversationTraces } from "./scenario.impl"; + +runMain(() => runWrapOpenAIConversationTraces(OpenAI, "4.104.0")); diff --git a/e2e/scenarios/wrap-openai-conversation-traces/scenario.openai-v5.ts b/e2e/scenarios/wrap-openai-conversation-traces/scenario.openai-v5.ts new file mode 100644 index 000000000..ac4ee2541 --- /dev/null +++ b/e2e/scenarios/wrap-openai-conversation-traces/scenario.openai-v5.ts @@ -0,0 +1,5 @@ +import OpenAI from "openai-v5"; +import { runMain } from "../../helpers/scenario-runtime"; +import { runWrapOpenAIConversationTraces } from "./scenario.impl"; + +runMain(() => runWrapOpenAIConversationTraces(OpenAI, "5.11.0")); diff --git a/e2e/scenarios/wrap-openai-conversation-traces/scenario.openai-v6.ts b/e2e/scenarios/wrap-openai-conversation-traces/scenario.openai-v6.ts new file mode 100644 index 000000000..58701e96a --- /dev/null +++ b/e2e/scenarios/wrap-openai-conversation-traces/scenario.openai-v6.ts @@ -0,0 +1,5 @@ +import OpenAI from "openai"; +import { runMain } from "../../helpers/scenario-runtime"; +import { runWrapOpenAIConversationTraces } from "./scenario.impl"; + +runMain(() => runWrapOpenAIConversationTraces(OpenAI, "6.25.0")); diff --git a/e2e/tests/wrap-openai-conversation-traces.test.ts b/e2e/scenarios/wrap-openai-conversation-traces/scenario.test.ts similarity index 82% rename from e2e/tests/wrap-openai-conversation-traces.test.ts rename to e2e/scenarios/wrap-openai-conversation-traces/scenario.test.ts index c569d9dec..a63eb3572 100644 --- a/e2e/tests/wrap-openai-conversation-traces.test.ts +++ b/e2e/scenarios/wrap-openai-conversation-traces/scenario.test.ts @@ -1,22 +1,37 @@ -import { expect, test } from "vitest"; -import { normalizeForSnapshot, type Json } from "./helpers/normalize"; +import { beforeAll, expect, test } from "vitest"; +import { normalizeForSnapshot, type Json } from "../../helpers/normalize"; import { OPENAI_SCENARIO_TIMEOUT_MS, WRAP_OPENAI_SCENARIOS, summarizeOpenAIContract, -} from "./helpers/openai"; -import { withScenarioHarness } from "./helpers/scenario-harness"; -import { findLatestChildSpan, findLatestSpan } from "./helpers/trace-selectors"; +} from "../../helpers/openai"; +import { + installScenarioDependencies, + resolveScenarioDir, + withScenarioHarness, +} from "../../helpers/scenario-harness"; +import { + findLatestChildSpan, + findLatestSpan, +} from "../../helpers/trace-selectors"; + +const scenarioDir = resolveScenarioDir(import.meta.url); + +beforeAll(async () => { + await installScenarioDependencies({ scenarioDir }); +}); test.each( - WRAP_OPENAI_SCENARIOS.map( - ({ scenarioPath, version }) => [version, scenarioPath] as const, - ), + WRAP_OPENAI_SCENARIOS.map(({ entry, version }) => [version, entry] as const), )( "wrap-openai-conversation-traces logs wrapped chat and responses traces (openai %s)", - async (version, scenarioPath) => { - await withScenarioHarness(async ({ events, runScenario }) => { - await runScenario(scenarioPath, OPENAI_SCENARIO_TIMEOUT_MS); + async (version, entry) => { + await withScenarioHarness(async ({ events, runScenarioDir }) => { + await runScenarioDir({ + entry, + scenarioDir, + timeoutMs: OPENAI_SCENARIO_TIMEOUT_MS, + }); const capturedEvents = events(); diff --git a/e2e/tsconfig.json b/e2e/tsconfig.json index e7a09c397..fefc39848 100644 --- a/e2e/tsconfig.json +++ b/e2e/tsconfig.json @@ -1,9 +1,9 @@ { "compilerOptions": { "lib": ["es2022"], - "module": "nodenext", + "module": "es2022", "target": "es2022", - "moduleResolution": "nodenext", + "moduleResolution": "bundler", "strict": true, "esModuleInterop": true, "skipLibCheck": true, diff --git a/e2e/turbo.json b/e2e/turbo.json deleted file mode 100644 index 3adcb89ef..000000000 --- a/e2e/turbo.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "extends": ["//"] -} diff --git a/e2e/vitest.config.mts b/e2e/vitest.config.mts index 391c4255c..e41a1d0ec 100644 --- a/e2e/vitest.config.mts +++ b/e2e/vitest.config.mts @@ -3,7 +3,7 @@ import { defineConfig } from "vitest/config"; export default defineConfig({ test: { hookTimeout: 20_000, - include: ["tests/**/*.test.ts"], + include: ["scenarios/**/*.test.ts"], testTimeout: 20_000, }, }); diff --git a/knip.json b/knip.json new file mode 100644 index 000000000..9b953f173 --- /dev/null +++ b/knip.json @@ -0,0 +1,23 @@ +{ + "workspaces": { + "e2e": { + "entry": [ + "package.json", + "vitest.config.mts", + "helpers/**/*.ts", + "scenarios/**/*.test.ts", + "scenarios/**/package.json", + "scenarios/**/*.ts", + "scenarios/**/*.mjs" + ], + "project": [ + "helpers/**/*.ts", + "scenarios/**/*.test.ts", + "scenarios/**/package.json", + "scenarios/**/*.ts", + "scenarios/**/*.mjs" + ], + "ignoreDependencies": ["openai", "openai-v4", "openai-v5"] + } + } +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ec34c30e5..1801d2de2 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -50,15 +50,6 @@ importers: braintrust: specifier: workspace:^ version: link:../js - openai: - specifier: 6.25.0 - version: 6.25.0(ws@8.18.3)(zod@3.25.76) - openai-v4: - specifier: npm:openai@4.104.0 - version: openai@4.104.0(ws@8.18.3)(zod@3.25.76) - openai-v5: - specifier: npm:openai@5.11.0 - version: openai@5.11.0(ws@8.18.3)(zod@3.25.76) tsx: specifier: ^3.14.0 version: 3.14.0 From c0bc242ad153cd1d3fc2ceb38398b0ff170eb451 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 10 Mar 2026 10:44:49 +0100 Subject: [PATCH 08/11] update knip --- knip.json | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/knip.json b/knip.json index 9b953f173..93e4304bc 100644 --- a/knip.json +++ b/knip.json @@ -17,6 +17,14 @@ "scenarios/**/*.ts", "scenarios/**/*.mjs" ], + "ignore": ["runScenarioDir", "runNodeScenarioDir"], + "ignoreFiles": [ + "helpers/scenario-runtime.ts", + "scenarios/**/scenario.ts", + "scenarios/**/scenario.mjs", + "scenarios/**/scenario.*.ts", + "scenarios/**/scenario.*.mjs" + ], "ignoreDependencies": ["openai", "openai-v4", "openai-v5"] } } From dfa79da2394e8d232c8d6e0f512cc348b0648c4f Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 10 Mar 2026 12:50:37 +0100 Subject: [PATCH 09/11] knip jsonc --- knip.json | 31 ------------------------------- knip.jsonc | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 31 deletions(-) delete mode 100644 knip.json diff --git a/knip.json b/knip.json deleted file mode 100644 index 93e4304bc..000000000 --- a/knip.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "workspaces": { - "e2e": { - "entry": [ - "package.json", - "vitest.config.mts", - "helpers/**/*.ts", - "scenarios/**/*.test.ts", - "scenarios/**/package.json", - "scenarios/**/*.ts", - "scenarios/**/*.mjs" - ], - "project": [ - "helpers/**/*.ts", - "scenarios/**/*.test.ts", - "scenarios/**/package.json", - "scenarios/**/*.ts", - "scenarios/**/*.mjs" - ], - "ignore": ["runScenarioDir", "runNodeScenarioDir"], - "ignoreFiles": [ - "helpers/scenario-runtime.ts", - "scenarios/**/scenario.ts", - "scenarios/**/scenario.mjs", - "scenarios/**/scenario.*.ts", - "scenarios/**/scenario.*.mjs" - ], - "ignoreDependencies": ["openai", "openai-v4", "openai-v5"] - } - } -} diff --git a/knip.jsonc b/knip.jsonc index 7f8b7b0e4..e7d364a45 100644 --- a/knip.jsonc +++ b/knip.jsonc @@ -10,6 +10,33 @@ "**/generated_types.ts": ["exports", "types"], }, "workspaces": { + "e2e": { + "entry": [ + "package.json", + "vitest.config.mts", + "helpers/**/*.ts", + "scenarios/**/*.test.ts", + "scenarios/**/package.json", + "scenarios/**/*.ts", + "scenarios/**/*.mjs", + ], + "project": [ + "helpers/**/*.ts", + "scenarios/**/*.test.ts", + "scenarios/**/package.json", + "scenarios/**/*.ts", + "scenarios/**/*.mjs", + ], + "ignore": ["runScenarioDir", "runNodeScenarioDir"], + "ignoreFiles": [ + "helpers/scenario-runtime.ts", + "scenarios/**/scenario.ts", + "scenarios/**/scenario.mjs", + "scenarios/**/scenario.*.ts", + "scenarios/**/scenario.*.mjs", + ], + "ignoreDependencies": ["openai", "openai-v4", "openai-v5"], + }, "js": { "entry": [ "src/auto-instrumentations/bundler/*.ts", From f43516c820ae5f38174fb333a5277951fc41b9e7 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 10 Mar 2026 13:04:29 +0100 Subject: [PATCH 10/11] add dead code check --- .github/workflows/lint.yaml | 2 +- package.json | 3 ++- pnpm-lock.yaml | 6 ++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index b6732911d..82a0d4097 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -32,4 +32,4 @@ jobs: - name: Install dependencies run: pnpm install --frozen-lockfile - name: Check dead code - run: pnpm run knip + run: pnpm run check:dead-code diff --git a/package.json b/package.json index f42773d7b..6fa3075e6 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,8 @@ "watch": "turbo run watch", "start": "turbo run start", "clean": "turbo run clean", - "knip": "knip --config knip.jsonc --no-config-hints", + "check:dead-code": "knip --config knip.jsonc --no-config-hints", + "knip": "pnpm run check:dead-code", "test": "dotenv -e .env -- turbo run test --filter=\"!@braintrust/otel\"", "test:e2e": "dotenv -e .env -- turbo run test:e2e", "test:e2e:update": "dotenv -e .env -- turbo run test:e2e:update", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1801d2de2..058557ecf 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -5775,6 +5775,10 @@ packages: engines: {node: '>=0.8.0'} hasBin: true + unbash@2.2.0: + resolution: {integrity: sha512-X2wH19RAPZE3+ldGicOkoj/SIA83OIxcJ6Cuaw23hf8Xc6fQpvZXY0SftE2JgS0QhYLUG4uwodSI3R53keyh7w==} + engines: {node: '>=14'} + undici-types@5.26.5: resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} @@ -12429,6 +12433,8 @@ snapshots: uglify-js@3.19.3: optional: true + unbash@2.2.0: {} + undici-types@5.26.5: {} undici-types@6.21.0: {} From 809c54ff875418b4a3eaa29200dc5db79c11ad2d Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 10 Mar 2026 17:12:43 +0100 Subject: [PATCH 11/11] fix --- .github/workflows/lint.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 942f207d1..5d2713cb4 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -46,4 +46,4 @@ jobs: - name: Install dependencies run: pnpm install --frozen-lockfile - name: Check dead code - run: pnpm run check:dead-code + run: pnpm run knip