diff --git a/src/core/ai/recipes/google.ts b/src/core/ai/recipes/google.ts index 58e47cab3..bc3b9c670 100644 --- a/src/core/ai/recipes/google.ts +++ b/src/core/ai/recipes/google.ts @@ -14,6 +14,13 @@ export const google: Recipe = { models: ['gemini-embedding-001'], default_dims: 768, dims_options: [768, 1536, 3072], + // Per-text cap is 2048 tokens; per-request total ~20k tokens before + // gemini-embedding-001 starts 429-ing. chars_per_token: 2 covers + // CJK-dense corpora (English averages ~4, CJK ~1.5); safety_factor + // stays at the gateway default 0.8 so pre-split lands at ~8 000 + // chars/batch — well under any per-request floor Google publishes. + max_batch_tokens: 20_000, + chars_per_token: 2, cost_per_1m_tokens_usd: 0.15, price_last_verified: '2026-04-20', }, diff --git a/test/ai/adaptive-embed-batch.test.ts b/test/ai/adaptive-embed-batch.test.ts index 6cd8d06ab..adb78dc0f 100644 --- a/test/ai/adaptive-embed-batch.test.ts +++ b/test/ai/adaptive-embed-batch.test.ts @@ -375,16 +375,22 @@ describe('startup warning for recipes missing max_batch_tokens', () => { console.warn = original; } - // The warning text should match the documented contract. + // The warning text contract is still documented; after v0.34.5 (google + // declared max_batch_tokens) every first-party native/openai-compat + // recipe is capped, so the canary set is empty. The mechanism itself + // is still exercised by the once-per-process suppression check above + // (firstCallCount stability across re-configure). const contractMatch = warnings.filter(w => w.includes('[ai.gateway]') && w.includes('declares an embedding touchpoint'), ); - expect(contractMatch.length).toBeGreaterThan(0); + expect(contractMatch.length).toBe(0); // Voyage declares max_batch_tokens → suppressed. OpenAI is the // canonical fast-path recipe → also suppressed by id. Both must be // absent from the warnings. expect(warnings.find(w => w.includes('"voyage"'))).toBeUndefined(); expect(warnings.find(w => w.includes('"openai"'))).toBeUndefined(); + // After v0.34.5 google also declares max_batch_tokens. + expect(warnings.find(w => w.includes('"google"'))).toBeUndefined(); }); }); diff --git a/test/ai/no-batch-cap-suppression.serial.test.ts b/test/ai/no-batch-cap-suppression.serial.test.ts index 9bd3e69b7..433420e6e 100644 --- a/test/ai/no-batch-cap-suppression.serial.test.ts +++ b/test/ai/no-batch-cap-suppression.serial.test.ts @@ -52,15 +52,20 @@ describe('v0.32 #779: no_batch_cap suppresses the missing-max_batch_tokens warni } }); - test('configureGateway STILL warns for google (real provider, no cap declared)', () => { + test('configureGateway does not warn for any first-party recipe (every embedding recipe declares max_batch_tokens or no_batch_cap)', () => { warnSpy.mockClear(); resetGateway(); configureGateway({ env: {} }); const messages = warnSpy.mock.calls.map(c => String(c[0] ?? '')); - expect( - messages.some(m => m.includes('"google"') && m.includes('without max_batch_tokens')), - 'google should warn (it has fixed-cap models)', - ).toBe(true); + // Pre-v0.34.5 this test asserted that google STILL warned (waiting for + // someone to cap it). google now declares max_batch_tokens, so the + // contract flips: every native/openai-compat recipe is capped, and the + // missing-cap warning is silent for the entire first-party set. + const missingCapWarnings = messages.filter(m => + m.includes('declares an embedding touchpoint') && + m.includes('without max_batch_tokens'), + ); + expect(missingCapWarnings).toHaveLength(0); }); test('every recipe with empty models[] declares user_provided_models OR has openai-fast-path', () => {