Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/core/ai/recipes/google.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@ export const google: Recipe = {
models: ['gemini-embedding-001'],
default_dims: 768,
dims_options: [768, 1536, 3072],
// Per-text cap is 2048 tokens; per-request total ~20k tokens before
// gemini-embedding-001 starts 429-ing. chars_per_token: 2 covers
// CJK-dense corpora (English averages ~4, CJK ~1.5); safety_factor
// stays at the gateway default 0.8 so pre-split lands at ~8 000
// chars/batch — well under any per-request floor Google publishes.
max_batch_tokens: 20_000,
chars_per_token: 2,
cost_per_1m_tokens_usd: 0.15,
price_last_verified: '2026-04-20',
},
Expand Down
10 changes: 8 additions & 2 deletions test/ai/adaptive-embed-batch.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -375,16 +375,22 @@ describe('startup warning for recipes missing max_batch_tokens', () => {
console.warn = original;
}

// The warning text should match the documented contract.
// The warning text contract is still documented; after v0.34.5 (google
// declared max_batch_tokens) every first-party native/openai-compat
// recipe is capped, so the canary set is empty. The mechanism itself
// is still exercised by the once-per-process suppression check above
// (firstCallCount stability across re-configure).
const contractMatch = warnings.filter(w =>
w.includes('[ai.gateway]') && w.includes('declares an embedding touchpoint'),
);
expect(contractMatch.length).toBeGreaterThan(0);
expect(contractMatch.length).toBe(0);

// Voyage declares max_batch_tokens → suppressed. OpenAI is the
// canonical fast-path recipe → also suppressed by id. Both must be
// absent from the warnings.
expect(warnings.find(w => w.includes('"voyage"'))).toBeUndefined();
expect(warnings.find(w => w.includes('"openai"'))).toBeUndefined();
// After v0.34.5 google also declares max_batch_tokens.
expect(warnings.find(w => w.includes('"google"'))).toBeUndefined();
});
});
15 changes: 10 additions & 5 deletions test/ai/no-batch-cap-suppression.serial.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,20 @@ describe('v0.32 #779: no_batch_cap suppresses the missing-max_batch_tokens warni
}
});

test('configureGateway STILL warns for google (real provider, no cap declared)', () => {
test('configureGateway does not warn for any first-party recipe (every embedding recipe declares max_batch_tokens or no_batch_cap)', () => {
warnSpy.mockClear();
resetGateway();
configureGateway({ env: {} });
const messages = warnSpy.mock.calls.map(c => String(c[0] ?? ''));
expect(
messages.some(m => m.includes('"google"') && m.includes('without max_batch_tokens')),
'google should warn (it has fixed-cap models)',
).toBe(true);
// Pre-v0.34.5 this test asserted that google STILL warned (waiting for
// someone to cap it). google now declares max_batch_tokens, so the
// contract flips: every native/openai-compat recipe is capped, and the
// missing-cap warning is silent for the entire first-party set.
const missingCapWarnings = messages.filter(m =>
m.includes('declares an embedding touchpoint') &&
m.includes('without max_batch_tokens'),
);
expect(missingCapWarnings).toHaveLength(0);
});

test('every recipe with empty models[] declares user_provided_models OR has openai-fast-path', () => {
Expand Down