Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: e2e

on:
pull_request:
paths:
- "e2e/**"
- "js/**"
- ".github/workflows/e2e.yaml"
- "package.json"
- "pnpm-lock.yaml"
- "pnpm-workspace.yaml"
- "turbo.json"
push:
branches: [main]

permissions:
contents: read

jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 20

env:
BRAINTRUST_API_KEY: ${{ secrets.BRAINTRUST_API_KEY }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}

steps:
- uses: actions/checkout@v4

- uses: actions/setup-node@v4
with:
node-version: 20

- uses: pnpm/action-setup@v4

- name: Install dependencies
run: pnpm install --frozen-lockfile

- name: Run e2e tests
run: pnpm test:e2e
53 changes: 53 additions & 0 deletions e2e/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# E2E Tests

End-to-end tests that validate the Braintrust SDK by running real usage scenarios against a mock Braintrust server.

## How It Works

1. Each test uses `withScenarioHarness(...)`, which starts an isolated mock Braintrust server
2. The test spawns a scenario script as a subprocess, usually with `tsx`
3. The scenario uses the SDK normally (init, create spans, log data, flush, or OTEL / OpenAI integrations)
4. The test inspects captured events, payloads, or raw HTTP requests, then normalizes and snapshots them where useful

Subprocess isolation keeps the SDK execution path close to production, including plain Node runs for auto-instrumentation hook coverage.

## Structure

```
e2e/
|- scenarios/ # Standalone scripts run as subprocesses
|- tests/
| |- helpers/ # Harness, mock server, normalization, selectors, summaries
| |- *.test.ts # Trace, OTEL, and OpenAI coverage
| `- __snapshots__/ # Vitest snapshots
`- vitest.config.mts
```

## Helpers (`tests/helpers/`)

- `scenario-harness.ts` - Starts the mock server, creates a unique test run id, and runs scenarios.
- `mock-braintrust-server.ts` - Captures requests, merged log payloads, and parsed span-like events.
- `normalize.ts` - Makes snapshots deterministic by normalizing ids, timestamps, paths, and mock-server URLs.
- `trace-selectors.ts` / `trace-summary.ts` - Helpers for finding spans and snapshotting only the relevant shape.
- `openai.ts` - Shared scenario lists and assertions for OpenAI wrapper and hook coverage across v4/v5/v6.

### Writing a new test

Most tests use `withScenarioHarness(async (harness) => { ... })`. It gives each test a fresh server plus helpers for running scenarios and reading what the server captured.

The main utilities you'll use in test files:

- `runScenario(path, timeoutMs?)` - Runs a TypeScript scenario with `tsx`.
- `runNodeScenario(path, nodeArgs?, timeoutMs?)` - Runs plain Node scenarios, used for `--import braintrust/hook.mjs`.
- `testRunEvents()` - Returns parsed events tagged with the current test run id.
- `events()`, `payloads()`, `requestCursor()`, `requestsAfter()` - Lower-level access for ingestion payloads and HTTP request flow assertions.
- `testRunId` - Useful when a scenario or assertion needs the exact run marker.

Use `normalizeForSnapshot(...)` before snapshotting. It replaces timestamps and ids with stable tokens and strips machine-specific paths and localhost ports.

## Running

```bash
pnpm run test:e2e # Run tests
pnpm run test:e2e:update # Run tests and update snapshots
```
23 changes: 23 additions & 0 deletions e2e/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"name": "@braintrust/js-e2e-tests",
"version": "0.0.0",
"private": true,
"scripts": {
"test:e2e": "vitest run",
"test:e2e:update": "vitest run --update"
},
"devDependencies": {
"@braintrust/otel": "workspace:^",
"@opentelemetry/api": ">=1.9.0",
"@opentelemetry/context-async-hooks": ">=1.9.0",
"@opentelemetry/sdk-trace-base": ">=1.9.0",
"@types/node": "^20.10.5",
"braintrust": "workspace:^",
"openai": "6.25.0",
"openai-v4": "npm:openai@4.104.0",
"openai-v5": "npm:openai@5.11.0",
"tsx": "^3.14.0",
"typescript": "5.4.4",
"vitest": "^2.1.9"
}
}
44 changes: 44 additions & 0 deletions e2e/scenarios/helpers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import { BasicTracerProvider } from "@opentelemetry/sdk-trace-base";

export async function collectAsync<T>(records: AsyncIterable<T>): Promise<T[]> {
const items: T[] = [];
for await (const record of records) {
items.push(record);
}
return items;
}

export function getTestRunId(): string {
return process.env.BRAINTRUST_E2E_RUN_ID!;
}

export function scopedName(base: string, testRunId = getTestRunId()): string {
const suffix = testRunId.toLowerCase().replace(/[^a-z0-9-]/g, "-");
return `${base}-${suffix}`;
}

export function createTracerProvider(processors: unknown[]) {
const testProvider = new BasicTracerProvider();

if (
typeof (testProvider as { addSpanProcessor?: unknown }).addSpanProcessor ===
"function"
) {
const provider = new BasicTracerProvider() as BasicTracerProvider & {
addSpanProcessor: (processor: unknown) => void;
};
processors.forEach((processor) => provider.addSpanProcessor(processor));
return provider;
}

return new BasicTracerProvider({
spanProcessors: processors as never,
});
}

export function runMain(main: () => Promise<void>): void {
void main().catch((error) => {
console.error(error);
process.exitCode = 1;
});
}
66 changes: 66 additions & 0 deletions e2e/scenarios/openai-auto-instrumentation-node-hook.impl.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import { initLogger } from "braintrust";

const OPENAI_MODEL = "gpt-4o-mini";

function getTestRunId() {
return process.env.BRAINTRUST_E2E_RUN_ID;
}

function scopedName(base, testRunId = getTestRunId()) {
const suffix = testRunId.toLowerCase().replace(/[^a-z0-9-]/g, "-");
return `${base}-${suffix}`;
}

export async function runOpenAIAutoInstrumentationNodeHook(
OpenAI,
openaiSdkVersion,
) {
const testRunId = getTestRunId();
const logger = initLogger({
projectName: scopedName("e2e-openai-auto-instrumentation-hook", testRunId),
});
const client = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
baseURL: process.env.OPENAI_BASE_URL,
});

await logger.traced(
async () => {
await client.chat.completions.create({
model: OPENAI_MODEL,
messages: [
{
role: "user",
content: "Auto-instrument this request.",
},
],
max_tokens: 8,
temperature: 0,
});
},
{
name: "openai-auto-hook-root",
event: {
metadata: {
scenario: "openai-auto-instrumentation-node-hook",
openaiSdkVersion,
testRunId,
},
},
},
);

await logger.flush();
}

export function runOpenAIAutoInstrumentationNodeHookOrExit(
OpenAI,
openaiSdkVersion,
) {
void runOpenAIAutoInstrumentationNodeHook(OpenAI, openaiSdkVersion).catch(
(error) => {
console.error(error);
process.exitCode = 1;
},
);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import OpenAI from "openai-v4";
import { runOpenAIAutoInstrumentationNodeHookOrExit } from "./openai-auto-instrumentation-node-hook.impl.mjs";

runOpenAIAutoInstrumentationNodeHookOrExit(OpenAI, "4.104.0");
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import OpenAI from "openai-v5";
import { runOpenAIAutoInstrumentationNodeHookOrExit } from "./openai-auto-instrumentation-node-hook.impl.mjs";

runOpenAIAutoInstrumentationNodeHookOrExit(OpenAI, "5.11.0");
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import OpenAI from "openai";
import { runOpenAIAutoInstrumentationNodeHookOrExit } from "./openai-auto-instrumentation-node-hook.impl.mjs";

runOpenAIAutoInstrumentationNodeHookOrExit(OpenAI, "6.25.0");
75 changes: 75 additions & 0 deletions e2e/scenarios/otel-compat-mixed-tracing.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import { context as otelContext, trace } from "@opentelemetry/api";
import { AsyncHooksContextManager } from "@opentelemetry/context-async-hooks";
import { BraintrustSpanProcessor, setupOtelCompat } from "@braintrust/otel";
import { getContextManager, initLogger } from "braintrust";
import {
createTracerProvider,
getTestRunId,
runMain,
scopedName,
} from "./helpers";

async function main() {
const testRunId = getTestRunId();
setupOtelCompat();

const contextManager = new AsyncHooksContextManager();
contextManager.enable();
otelContext.setGlobalContextManager(contextManager);

try {
const processor = new BraintrustSpanProcessor({
apiKey: process.env.BRAINTRUST_API_KEY!,
apiUrl: process.env.BRAINTRUST_API_URL!,
parent: `project_name:${scopedName("e2e-otel-compat-mixed-tracing", testRunId)}`,
});
const provider = createTracerProvider([processor]);
trace.setGlobalTracerProvider(provider);

const tracer = trace.getTracer("e2e-otel-compat");
const logger = initLogger({
projectName: scopedName("e2e-otel-compat-mixed-tracing", testRunId),
});
const btRoot = logger.startSpan({
name: "bt-root",
event: {
metadata: {
scenario: "otel-compat-mixed-tracing",
testRunId,
},
},
});
const contextManagerFacade = getContextManager();

await contextManagerFacade.runInContext(btRoot, async () => {
await tracer.startActiveSpan("otel-middle", async (otelSpan) => {
const btChild = logger.startSpan({
name: "bt-child-under-otel",
event: {
metadata: {
kind: "bt-child-under-otel",
testRunId,
},
},
});
btChild.log({
output: {
source: "otel-child-context",
},
});
btChild.end();
otelSpan.end();
});
});
btRoot.end();

await logger.flush();
await processor.forceFlush();
await (provider as { shutdown?: () => Promise<void> }).shutdown?.();
} finally {
otelContext.disable();
contextManager.disable();
}
}

runMain(main);
33 changes: 33 additions & 0 deletions e2e/scenarios/otel-span-processor-export.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import { context, trace } from "@opentelemetry/api";
import { BraintrustSpanProcessor } from "@braintrust/otel";
import {
createTracerProvider,
getTestRunId,
runMain,
scopedName,
} from "./helpers";

async function main() {
const testRunId = getTestRunId();
const processor = new BraintrustSpanProcessor({
apiKey: process.env.BRAINTRUST_API_KEY!,
apiUrl: process.env.BRAINTRUST_API_URL!,
filterAISpans: true,
parent: `project_name:${scopedName("e2e-otel-span-processor-export", testRunId)}`,
});
const provider = createTracerProvider([processor]);
trace.setGlobalTracerProvider(provider);

const tracer = trace.getTracer("e2e-otel-export");
const rootSpan = tracer.startSpan("root-operation");
const rootContext = trace.setSpan(context.active(), rootSpan);
const aiSpan = tracer.startSpan("gen_ai.completion", undefined, rootContext);
aiSpan.setAttribute("gen_ai.system", "openai");
aiSpan.end();
rootSpan.end();

await processor.forceFlush();
await (provider as { shutdown?: () => Promise<void> }).shutdown?.();
}

runMain(main);
Loading
Loading