From 86e02fcf3da6330c4f68e5901be2733c625ca16a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=99=88=E6=BA=90=E6=B3=89?= Date: Fri, 15 May 2026 00:54:40 -0700 Subject: [PATCH] fix(ai/recipes): declare max_batch_tokens on google embedding recipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit google was the only first-party embedding recipe still missing max_batch_tokens after v0.32 #779 landed the once-per-process startup warning. Operators routing through google:gemini-embedding-001 (the default-provider path after v0.27 native gateway) saw the warning on every `gbrain query`, every kos-compat-api / MCP `/ingest` response, and every cron `gbrain` invocation. For CJK-dense or large-payload batches the absent field also forced the gateway to discover Google's per-request token cap reactively via recursive halving instead of pre-splitting. Declared: - max_batch_tokens: 20_000 — Google's per-text cap is 2048 tokens; ~20k tokens/request is the soft cap before gemini-embedding-001 starts emitting 429s. - chars_per_token: 2 — CJK density on mixed corpora (English averages ~4, CJK ~1.5; 2 keeps pre-split safe for both). - safety_factor left at gateway default 0.8 → pre-split lands at ~8 000 chars/batch, well under any per-request floor Google publishes. Two existing regression tests pinned google as the canary "real provider with no cap declared": - test/ai/no-batch-cap-suppression.serial.test.ts assumed google STILL warned (the comment explicitly called it a fixed-cap model waiting for someone to cap it). With this patch google joins the capped set, so the test flips to assert the strong invariant: NO first-party recipe warns, because every native and openai-compat recipe now declares either max_batch_tokens or no_batch_cap. - test/ai/adaptive-embed-batch.test.ts checked `contractMatch.length > 0`. After this patch the canary set is empty, so `toBe(0)`. The once-per-process suppression mechanism is still exercised by the `firstCallCount` stability check earlier in the same test. Validation: - bun run typecheck clean - bun test test/ai/ — 144 pass / 0 fail (was 142 pass / 2 fail pre-patch, expected: the two tests above) --- src/core/ai/recipes/google.ts | 7 +++++++ test/ai/adaptive-embed-batch.test.ts | 10 ++++++++-- test/ai/no-batch-cap-suppression.serial.test.ts | 15 ++++++++++----- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/core/ai/recipes/google.ts b/src/core/ai/recipes/google.ts index 58e47cab3..bc3b9c670 100644 --- a/src/core/ai/recipes/google.ts +++ b/src/core/ai/recipes/google.ts @@ -14,6 +14,13 @@ export const google: Recipe = { models: ['gemini-embedding-001'], default_dims: 768, dims_options: [768, 1536, 3072], + // Per-text cap is 2048 tokens; per-request total ~20k tokens before + // gemini-embedding-001 starts 429-ing. chars_per_token: 2 covers + // CJK-dense corpora (English averages ~4, CJK ~1.5); safety_factor + // stays at the gateway default 0.8 so pre-split lands at ~8 000 + // chars/batch — well under any per-request floor Google publishes. + max_batch_tokens: 20_000, + chars_per_token: 2, cost_per_1m_tokens_usd: 0.15, price_last_verified: '2026-04-20', }, diff --git a/test/ai/adaptive-embed-batch.test.ts b/test/ai/adaptive-embed-batch.test.ts index 6cd8d06ab..adb78dc0f 100644 --- a/test/ai/adaptive-embed-batch.test.ts +++ b/test/ai/adaptive-embed-batch.test.ts @@ -375,16 +375,22 @@ describe('startup warning for recipes missing max_batch_tokens', () => { console.warn = original; } - // The warning text should match the documented contract. + // The warning text contract is still documented; after v0.34.5 (google + // declared max_batch_tokens) every first-party native/openai-compat + // recipe is capped, so the canary set is empty. The mechanism itself + // is still exercised by the once-per-process suppression check above + // (firstCallCount stability across re-configure). const contractMatch = warnings.filter(w => w.includes('[ai.gateway]') && w.includes('declares an embedding touchpoint'), ); - expect(contractMatch.length).toBeGreaterThan(0); + expect(contractMatch.length).toBe(0); // Voyage declares max_batch_tokens → suppressed. OpenAI is the // canonical fast-path recipe → also suppressed by id. Both must be // absent from the warnings. expect(warnings.find(w => w.includes('"voyage"'))).toBeUndefined(); expect(warnings.find(w => w.includes('"openai"'))).toBeUndefined(); + // After v0.34.5 google also declares max_batch_tokens. + expect(warnings.find(w => w.includes('"google"'))).toBeUndefined(); }); }); diff --git a/test/ai/no-batch-cap-suppression.serial.test.ts b/test/ai/no-batch-cap-suppression.serial.test.ts index 9bd3e69b7..433420e6e 100644 --- a/test/ai/no-batch-cap-suppression.serial.test.ts +++ b/test/ai/no-batch-cap-suppression.serial.test.ts @@ -52,15 +52,20 @@ describe('v0.32 #779: no_batch_cap suppresses the missing-max_batch_tokens warni } }); - test('configureGateway STILL warns for google (real provider, no cap declared)', () => { + test('configureGateway does not warn for any first-party recipe (every embedding recipe declares max_batch_tokens or no_batch_cap)', () => { warnSpy.mockClear(); resetGateway(); configureGateway({ env: {} }); const messages = warnSpy.mock.calls.map(c => String(c[0] ?? '')); - expect( - messages.some(m => m.includes('"google"') && m.includes('without max_batch_tokens')), - 'google should warn (it has fixed-cap models)', - ).toBe(true); + // Pre-v0.34.5 this test asserted that google STILL warned (waiting for + // someone to cap it). google now declares max_batch_tokens, so the + // contract flips: every native/openai-compat recipe is capped, and the + // missing-cap warning is silent for the entire first-party set. + const missingCapWarnings = messages.filter(m => + m.includes('declares an embedding touchpoint') && + m.includes('without max_batch_tokens'), + ); + expect(missingCapWarnings).toHaveLength(0); }); test('every recipe with empty models[] declares user_provided_models OR has openai-fast-path', () => {