garrytan · alexandreroumieu-codeapprentice · May 9, 2026
diff --git a/src/core/ai/gateway.ts b/src/core/ai/gateway.ts
@@ -105,24 +105,32 @@ export function configureGateway(config: AIGatewayConfig): void {
  */
 const _warnedRecipes = new Set<string>();
 
+function getUsableMaxBatchTokens(maxBatchTokens: number | undefined): number | undefined {
+  return typeof maxBatchTokens === 'number' && maxBatchTokens > 0 ? maxBatchTokens : undefined;
+}
+
 /**
  * Walk every registered recipe with an `embedding` touchpoint. Each one
  * missing `max_batch_tokens` gets exactly one stderr line per process for
- * its first appearance. Recipes WITH the field stay quiet. The
- * recursive-halving safety net only fires when `max_batch_tokens` is set,
- * so a recipe that forgets it has no protection if the provider has a
- * batch cap. Loud-fail over silent-skip per CLAUDE.md; a future
- * Cohere/Mistral/Jina recipe that inherits the embedding-touchpoint
- * pattern but forgets the cap re-creates the v0.27 Voyage backfill loop.
- * The warning calls that out before production traffic hits it.
+ * its first appearance. Recipes WITH the field stay quiet. A `0` value is
+ * an explicit sentinel for model-specific or proxy-defined limits where the
+ * shipped recipe cannot truthfully advertise one static cap.
+ *
+ * The recursive-halving safety net only fires after a token-limit miss, so a
+ * recipe that accidentally omits a known cap loses the proactive pre-split.
+ * Loud-fail over silent-skip per CLAUDE.md; a future Cohere/Mistral/Jina
+ * recipe that inherits the embedding-touchpoint pattern but forgets the cap
+ * re-creates the v0.27 Voyage backfill loop. The warning calls that out
+ * before production traffic hits it.
  */
 function warnRecipesMissingBatchTokens(): void {
   for (const recipe of listRecipes()) {
     const embedding = recipe.touchpoints?.embedding;
-    if (!embedding || embedding.max_batch_tokens !== undefined) continue;
+    const maxBatchTokens = embedding?.max_batch_tokens;
+    if (!embedding || getUsableMaxBatchTokens(maxBatchTokens) !== undefined || maxBatchTokens === 0) continue;
     // OpenAI is the canonical "no cap declared, fast path is intentional"
-    // recipe; suppress the warning for it. Every other recipe missing the
-    // field is suspicious.
+    // recipe; suppress the warning for it. Every other accidental omission
+    // is suspicious.
     if (recipe.id === 'openai') continue;
     if (_warnedRecipes.has(recipe.id)) continue;
     _warnedRecipes.add(recipe.id);
@@ -357,7 +365,7 @@ export async function embed(texts: string[]): Promise<Float32Array[]> {
   const expected = cfg.embedding_dimensions ?? DEFAULT_EMBEDDING_DIMENSIONS;
 
   const embedding = recipe.touchpoints?.embedding;
-  const maxBatchTokens = embedding?.max_batch_tokens;
+  const maxBatchTokens = getUsableMaxBatchTokens(embedding?.max_batch_tokens);
   const charsPerToken = embedding?.chars_per_token ?? DEFAULT_CHARS_PER_TOKEN;
 
   // Pre-split is gated on max_batch_tokens. Recipes without it (e.g. OpenAI)

diff --git a/src/core/ai/recipes/google.ts b/src/core/ai/recipes/google.ts
@@ -16,6 +16,10 @@ export const google: Recipe = {
       dims_options: [768, 1536, 3072],
       cost_per_1m_tokens_usd: 0.15,
       price_last_verified: '2026-04-20',
+      // Google documents gemini-embedding-001 at 250 input texts / 20K
+      // tokens per request. Use the raw cap here; gateway.ts applies the
+      // recipe-level safety factor before pre-splitting.
+      max_batch_tokens: 20_000,
     },
     expansion: {
       models: ['gemini-2.0-flash', 'gemini-2.0-flash-lite'],

diff --git a/src/core/ai/recipes/litellm-proxy.ts b/src/core/ai/recipes/litellm-proxy.ts
@@ -26,6 +26,10 @@ export const litellmProxy: Recipe = {
       default_dims: 0, // user must declare --embedding-dimensions explicitly
       cost_per_1m_tokens_usd: undefined,
       price_last_verified: '2026-04-20',
+      // Sentinel: LiteLLM proxies arbitrary upstream embedding models, so the
+      // real batch cap lives in the proxy/model config (`max_input_tokens`),
+      // not in a single truthful static recipe constant.
+      max_batch_tokens: 0,
     },
   },
   setup_hint: 'Run LiteLLM (https://docs.litellm.ai) in front of any provider; set LITELLM_BASE_URL + pass --embedding-model litellm:<model> and --embedding-dimensions <N>.',

diff --git a/src/core/ai/recipes/ollama.ts b/src/core/ai/recipes/ollama.ts
@@ -17,6 +17,10 @@ export const ollama: Recipe = {
       default_dims: 768, // nomic-embed-text native dim
       cost_per_1m_tokens_usd: 0,
       price_last_verified: '2026-04-20',
+      // Sentinel: Ollama's embed cap is model-/runtime-specific (`num_ctx`,
+      // context window, truncate=true by default), so there is no truthful
+      // static provider-wide max_batch_tokens value for this recipe.
+      max_batch_tokens: 0,
     },
   },
   setup_hint: 'Install Ollama from https://ollama.ai, then `ollama pull nomic-embed-text` and `ollama serve`.',

diff --git a/test/ai/adaptive-embed-batch.test.ts b/test/ai/adaptive-embed-batch.test.ts
@@ -39,6 +39,7 @@ import {
   __getShrinkStateForTests,
 } from '../../src/core/ai/gateway.ts';
 import { AIConfigError, AITransientError } from '../../src/core/ai/errors.ts';
+import { getRecipe } from '../../src/core/ai/recipes/index.ts';
 
 // --------- Test helpers ---------
 
@@ -357,34 +358,28 @@ describe('shrink-on-miss adaptive cache', () => {
 
 // --------- 7. Startup warning (D9-B) ---------
 
-describe('startup warning for recipes missing max_batch_tokens', () => {
+describe('startup warning hygiene for shipped recipes', () => {
   beforeEach(() => resetGateway());
 
-  test('first configureGateway call warns about each missing-cap recipe; subsequent calls suppressed', () => {
+  test('shipped embedding recipes are either capped or explicitly opt out', () => {
+    expect(getRecipe('google')!.touchpoints.embedding!.max_batch_tokens).toBe(20_000);
+    expect(getRecipe('ollama')!.touchpoints.embedding!.max_batch_tokens).toBe(0);
+    expect(getRecipe('litellm')!.touchpoints.embedding!.max_batch_tokens).toBe(0);
+  });
+
+  test('configureGateway stays quiet for the shipped registry; subsequent calls remain quiet', () => {
     const warnings: string[] = [];
     const original = console.warn;
     console.warn = (msg: string) => warnings.push(String(msg));
     try {
       configureOpenAI();
       const firstCallCount = warnings.length;
-      // Reconfigure: the warning should NOT re-fire for the same recipes
-      // within one process (we already told the operator).
       configureOpenAI();
       expect(warnings.length).toBe(firstCallCount);
     } finally {
       console.warn = original;
     }
 
-    // The warning text should match the documented contract.
-    const contractMatch = warnings.filter(w =>
-      w.includes('[ai.gateway]') && w.includes('declares an embedding touchpoint'),
-    );
-    expect(contractMatch.length).toBeGreaterThan(0);
-
-    // Voyage declares max_batch_tokens → suppressed. OpenAI is the
-    // canonical fast-path recipe → also suppressed by id. Both must be
-    // absent from the warnings.
-    expect(warnings.find(w => w.includes('"voyage"'))).toBeUndefined();
-    expect(warnings.find(w => w.includes('"openai"'))).toBeUndefined();
+    expect(warnings).toEqual([]);
   });
 });