From 86e02fcf3da6330c4f68e5901be2733c625ca16a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=99=88=E6=BA=90=E6=B3=89?=
 <chenyuanquan@chenyuanquandeMac-mini.local>
Date: Fri, 15 May 2026 00:54:40 -0700
Subject: [PATCH] fix(ai/recipes): declare max_batch_tokens on google embedding
 recipe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

google was the only first-party embedding recipe still missing
max_batch_tokens after v0.32 #779 landed the once-per-process startup
warning. Operators routing through google:gemini-embedding-001 (the
default-provider path after v0.27 native gateway) saw the warning on
every `gbrain query`, every kos-compat-api / MCP `/ingest` response,
and every cron `gbrain` invocation. For CJK-dense or large-payload
batches the absent field also forced the gateway to discover Google's
per-request token cap reactively via recursive halving instead of
pre-splitting.

Declared:
- max_batch_tokens: 20_000 — Google's per-text cap is 2048 tokens;
  ~20k tokens/request is the soft cap before gemini-embedding-001
  starts emitting 429s.
- chars_per_token: 2 — CJK density on mixed corpora (English averages
  ~4, CJK ~1.5; 2 keeps pre-split safe for both).
- safety_factor left at gateway default 0.8 → pre-split lands at
  ~8 000 chars/batch, well under any per-request floor Google
  publishes.

Two existing regression tests pinned google as the canary "real
provider with no cap declared":

- test/ai/no-batch-cap-suppression.serial.test.ts assumed google
  STILL warned (the comment explicitly called it a fixed-cap model
  waiting for someone to cap it). With this patch google joins the
  capped set, so the test flips to assert the strong invariant: NO
  first-party recipe warns, because every native and openai-compat
  recipe now declares either max_batch_tokens or no_batch_cap.

- test/ai/adaptive-embed-batch.test.ts checked
  `contractMatch.length > 0`. After this patch the canary set is
  empty, so `toBe(0)`. The once-per-process suppression mechanism is
  still exercised by the `firstCallCount` stability check earlier in
  the same test.

Validation:
- bun run typecheck clean
- bun test test/ai/ — 144 pass / 0 fail (was 142 pass / 2 fail
  pre-patch, expected: the two tests above)
---
 src/core/ai/recipes/google.ts                   |  7 +++++++
 test/ai/adaptive-embed-batch.test.ts            | 10 ++++++++--
 test/ai/no-batch-cap-suppression.serial.test.ts | 15 ++++++++++-----
 3 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/src/core/ai/recipes/google.ts b/src/core/ai/recipes/google.ts
index 58e47cab3..bc3b9c670 100644
--- a/src/core/ai/recipes/google.ts
+++ b/src/core/ai/recipes/google.ts
@@ -14,6 +14,13 @@ export const google: Recipe = {
       models: ['gemini-embedding-001'],
       default_dims: 768,
       dims_options: [768, 1536, 3072],
+      // Per-text cap is 2048 tokens; per-request total ~20k tokens before
+      // gemini-embedding-001 starts 429-ing. chars_per_token: 2 covers
+      // CJK-dense corpora (English averages ~4, CJK ~1.5); safety_factor
+      // stays at the gateway default 0.8 so pre-split lands at ~8 000
+      // chars/batch — well under any per-request floor Google publishes.
+      max_batch_tokens: 20_000,
+      chars_per_token: 2,
       cost_per_1m_tokens_usd: 0.15,
       price_last_verified: '2026-04-20',
     },
diff --git a/test/ai/adaptive-embed-batch.test.ts b/test/ai/adaptive-embed-batch.test.ts
index 6cd8d06ab..adb78dc0f 100644
--- a/test/ai/adaptive-embed-batch.test.ts
+++ b/test/ai/adaptive-embed-batch.test.ts
@@ -375,16 +375,22 @@ describe('startup warning for recipes missing max_batch_tokens', () => {
       console.warn = original;
     }
 
-    // The warning text should match the documented contract.
+    // The warning text contract is still documented; after v0.34.5 (google
+    // declared max_batch_tokens) every first-party native/openai-compat
+    // recipe is capped, so the canary set is empty. The mechanism itself
+    // is still exercised by the once-per-process suppression check above
+    // (firstCallCount stability across re-configure).
     const contractMatch = warnings.filter(w =>
       w.includes('[ai.gateway]') && w.includes('declares an embedding touchpoint'),
     );
-    expect(contractMatch.length).toBeGreaterThan(0);
+    expect(contractMatch.length).toBe(0);
 
     // Voyage declares max_batch_tokens → suppressed. OpenAI is the
     // canonical fast-path recipe → also suppressed by id. Both must be
     // absent from the warnings.
     expect(warnings.find(w => w.includes('"voyage"'))).toBeUndefined();
     expect(warnings.find(w => w.includes('"openai"'))).toBeUndefined();
+    // After v0.34.5 google also declares max_batch_tokens.
+    expect(warnings.find(w => w.includes('"google"'))).toBeUndefined();
   });
 });
diff --git a/test/ai/no-batch-cap-suppression.serial.test.ts b/test/ai/no-batch-cap-suppression.serial.test.ts
index 9bd3e69b7..433420e6e 100644
--- a/test/ai/no-batch-cap-suppression.serial.test.ts
+++ b/test/ai/no-batch-cap-suppression.serial.test.ts
@@ -52,15 +52,20 @@ describe('v0.32 #779: no_batch_cap suppresses the missing-max_batch_tokens warni
     }
   });
 
-  test('configureGateway STILL warns for google (real provider, no cap declared)', () => {
+  test('configureGateway does not warn for any first-party recipe (every embedding recipe declares max_batch_tokens or no_batch_cap)', () => {
     warnSpy.mockClear();
     resetGateway();
     configureGateway({ env: {} });
     const messages = warnSpy.mock.calls.map(c => String(c[0] ?? ''));
-    expect(
-      messages.some(m => m.includes('"google"') && m.includes('without max_batch_tokens')),
-      'google should warn (it has fixed-cap models)',
-    ).toBe(true);
+    // Pre-v0.34.5 this test asserted that google STILL warned (waiting for
+    // someone to cap it). google now declares max_batch_tokens, so the
+    // contract flips: every native/openai-compat recipe is capped, and the
+    // missing-cap warning is silent for the entire first-party set.
+    const missingCapWarnings = messages.filter(m =>
+      m.includes('declares an embedding touchpoint') &&
+      m.includes('without max_batch_tokens'),
+    );
+    expect(missingCapWarnings).toHaveLength(0);
   });
 
   test('every recipe with empty models[] declares user_provided_models OR has openai-fast-path', () => {