braintrustdata · knjiang · Jan 29, 2026 · knjiang · Jan 29, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -119,6 +119,12 @@ jobs:
     - name: Run TypeScript tests
       run: make test-typescript
 
+    - name: Install payloads dependencies
+      run: cd payloads && pnpm install
+
+    - name: Run payloads tests
+      run: cd payloads && pnpm test
+
   test-python:
     runs-on: ubuntu-latest
 

diff --git a/crates/lingua/src/universal/response.rs b/crates/lingua/src/universal/response.rs
@@ -43,7 +43,8 @@ pub struct UniversalUsage {
     /// Tokens written to cache during this request
     pub prompt_cache_creation_tokens: Option<i64>,
 
-    /// Tokens used for reasoning in completion (OpenAI: completion_tokens_details.reasoning_tokens, Google: thoughtsTokenCount)
+    /// Reasoning/thinking tokens used in the completion.
+    /// `Some(n)` only when `n > 0`; otherwise `None`.
     pub completion_reasoning_tokens: Option<i64>,
 }
 
@@ -215,10 +216,12 @@ impl UniversalUsage {
                     .and_then(|d| d.get("cached_tokens"))
                     .and_then(Value::as_i64),
                 prompt_cache_creation_tokens: None, // OpenAI doesn't report cache creation tokens
+                // Treat 0 as None: 0 reasoning tokens means "no reasoning" = semantically None
                 completion_reasoning_tokens: usage
                     .get("completion_tokens_details")
                     .and_then(|d| d.get("reasoning_tokens"))
-                    .and_then(Value::as_i64),
+                    .and_then(Value::as_i64)
+                    .filter(|&v| v > 0),
             },
             ProviderFormat::Responses => Self {
                 prompt_tokens: usage.get("input_tokens").and_then(Value::as_i64),
@@ -228,10 +231,12 @@ impl UniversalUsage {
                     .and_then(|d| d.get("cached_tokens"))
                     .and_then(Value::as_i64),
                 prompt_cache_creation_tokens: None,
+                // Treat 0 as None: 0 reasoning tokens means "no reasoning" = semantically None
                 completion_reasoning_tokens: usage
                     .get("output_tokens_details")
                     .and_then(|d| d.get("reasoning_tokens"))
-                    .and_then(Value::as_i64),
+                    .and_then(Value::as_i64)
+                    .filter(|&v| v > 0),
             },
             ProviderFormat::Anthropic => Self {
                 prompt_tokens: usage.get("input_tokens").and_then(Value::as_i64),
@@ -319,19 +324,14 @@ impl UniversalUsage {
                     serde_json::json!(prompt + completion),
                 );
 
-                if let Some(cached_tokens) = self.prompt_cached_tokens {
-                    map.insert(
-                        "input_tokens_details".into(),
-                        serde_json::json!({ "cached_tokens": cached_tokens }),
-                    );
-                }
-
-                if let Some(reasoning_tokens) = self.completion_reasoning_tokens {
-                    map.insert(
-                        "output_tokens_details".into(),
-                        serde_json::json!({ "reasoning_tokens": reasoning_tokens }),
-                    );
-                }
+                map.insert(
+                    "input_tokens_details".into(),
+                    serde_json::json!({ "cached_tokens": self.prompt_cached_tokens.unwrap_or(0) }),
+                );
+                map.insert(
+                    "output_tokens_details".into(),
+                    serde_json::json!({ "reasoning_tokens": self.completion_reasoning_tokens.unwrap_or(0) }),
+                );
 
                 Value::Object(map)
             }

diff --git a/payloads/package.json b/payloads/package.json
@@ -12,11 +12,16 @@
     "format": "prettier --write .",
     "format:check": "prettier --check .",
     "test": "vitest run",
-    "test:watch": "vitest"
+    "test:watch": "vitest",
+    "test:transforms": "vitest run scripts/transforms",
+    "test:transforms:update": "vitest run scripts/transforms -u",
+    "test:transforms:watch": "vitest scripts/transforms",
+    "lingua-capture": "tsx scripts/transforms/lingua-capture.ts"
   },
   "dependencies": {
     "@anthropic-ai/sdk": "^0.71.2",
     "@aws-sdk/client-bedrock-runtime": "^3.700.0",
+    "@braintrust/lingua-wasm": "workspace:*",
     "@google/genai": "^1.34.0",
     "openai": "^6.16.0"
   },

diff --git a/payloads/scripts/capture.ts b/payloads/scripts/capture.ts
@@ -1,6 +1,6 @@
 #!/usr/bin/env tsx
 
-import { mkdirSync } from "fs";
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
 import { join } from "path";
 import { needsRegeneration, updateCache } from "./cache-utils";
 import { saveAllFiles } from "./file-manager";
@@ -25,6 +25,7 @@ const allProviders = [
 interface CaptureOptions {
   list: boolean;
   force: boolean;
+  failing: boolean;
   filter?: string;
   providers?: string[];
   cases?: string[];
@@ -36,6 +37,7 @@ function parseArguments(): CaptureOptions {
   const options: CaptureOptions = {
     list: false,
     force: false,
+    failing: false,
   };
 
   for (let i = 0; i < args.length; i++) {
@@ -48,6 +50,9 @@ function parseArguments(): CaptureOptions {
       case "--force":
         options.force = true;
         break;
+      case "--failing":
+        options.failing = true;
+        break;
       case "--filter":
         if (i + 1 < args.length) {
           options.filter = args[i + 1];
@@ -87,7 +92,7 @@ function parseArguments(): CaptureOptions {
         if (arg.startsWith("--")) {
           console.error(`Unknown option: ${arg}`);
           console.error(
-            "Available options: --list, --force, --filter, --providers, --cases, --stream"
+            "Available options: --list, --force, --failing, --filter, --providers, --cases, --stream"
           );
           process.exit(1);
         }
@@ -104,6 +109,22 @@ interface CaseToRun {
   executor: ProviderExecutor<unknown, unknown, unknown>;
 }
 
+const FAILURES_FILE = ".failures.json";
+
+function loadFailures(outputDir: string): Set<string> {
+  const failuresPath = join(outputDir, FAILURES_FILE);
+  if (!existsSync(failuresPath)) {
+    return new Set();
+  }
+  const data = JSON.parse(readFileSync(failuresPath, "utf-8"));
+  return new Set(data);
+}
+
+function saveFailures(outputDir: string, failures: string[]): void {
+  const failuresPath = join(outputDir, FAILURES_FILE);
+  writeFileSync(failuresPath, JSON.stringify(failures, null, 2));
+}
+
 function getAllCases(options: CaptureOptions): CaseToRun[] {
   const cases: CaseToRun[] = [];
 
@@ -157,13 +178,28 @@ async function main() {
   const outputDir = join(__dirname, "..", "snapshots");
   mkdirSync(outputDir, { recursive: true });
 
+  // Filter to only failing cases if --failing is passed
+  let filteredCases = allCases;
+  if (options.failing) {
+    const previousFailures = loadFailures(outputDir);
+    if (previousFailures.size === 0) {
+      console.log("No previous failures recorded.");
+      return;
+    }
+    filteredCases = allCases.filter((c) =>
+      previousFailures.has(`${c.provider}/${c.caseName}`)
+    );
+    console.log(`Retrying ${filteredCases.length} previously failed cases...`);
+  }
+
   // Filter cases that need to be run
   const casesToRun: CaseToRun[] = [];
   const skippedCases: CaseToRun[] = [];
 
-  for (const case_ of allCases) {
+  for (const case_ of filteredCases) {
     if (
       !options.force &&
+      !options.failing &&
       !needsRegeneration(
         outputDir,
         case_.provider,
@@ -267,6 +303,15 @@ async function main() {
     }
   }
 
+  // Save failures for --failing retry
+  const failureKeys = failed.map(
+    (f) => `${f.case_.provider}/${f.case_.caseName}`
+  );
+  saveFailures(outputDir, failureKeys);
+  if (failed.length > 0) {
+    console.log(`\n💡 Run with --failing to retry failed cases`);
+  }
+
   console.log(`\nCapture complete! Results saved to: ${outputDir}`);
 }