vectorize-io · DK09876 · May 19, 2026 · May 22, 2026 · May 22, 2026
diff --git a/hindsight-api-slim/hindsight_api/config.py b/hindsight-api-slim/hindsight_api/config.py
@@ -200,6 +200,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
 ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
 ENV_EMBEDDINGS_OPENAI_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_BASE_URL"
 ENV_EMBEDDINGS_OPENAI_BATCH_SIZE = "HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE"
+ENV_EMBEDDINGS_OPENAI_DIMENSIONS = "HINDSIGHT_API_EMBEDDINGS_OPENAI_DIMENSIONS"
 
 # Gemini/Vertex AI embeddings configuration
 ENV_EMBEDDINGS_GEMINI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_GEMINI_API_KEY"
@@ -792,6 +793,13 @@ def _parse_positive_int(name: str, raw: str | None, default: int) -> int:
     return parsed
 
 
+def _parse_optional_positive_int(name: str, raw: str | None) -> int | None:
+    """Parse an optional env var that must be a positive integer when set."""
+    if raw is None or raw == "":
+        return None
+    return _parse_positive_int(name, raw, 1)
+
+
 def _validate_extraction_mode(mode: str) -> str:
     """Validate and normalize extraction mode."""
     mode_lower = mode.lower()
@@ -1179,6 +1187,7 @@ class HindsightConfig:
     # Defaulted fields (source-compatible additions — existing direct constructor callers keep working).
     # Keep at the end of the dataclass; Python forbids non-default fields after default fields.
     embeddings_openai_batch_size: int = DEFAULT_EMBEDDINGS_OPENAI_BATCH_SIZE
+    embeddings_openai_dimensions: int | None = None
 
     # Class-level sets for configuration categorization
 
@@ -1538,6 +1547,10 @@ def from_env(cls) -> "HindsightConfig":
                 os.getenv(ENV_EMBEDDINGS_OPENAI_BATCH_SIZE),
                 DEFAULT_EMBEDDINGS_OPENAI_BATCH_SIZE,
             ),
+            embeddings_openai_dimensions=_parse_optional_positive_int(
+                ENV_EMBEDDINGS_OPENAI_DIMENSIONS,
+                os.getenv(ENV_EMBEDDINGS_OPENAI_DIMENSIONS),
+            ),
             # Cohere embeddings (with backward-compatible fallback to shared API key)
             embeddings_cohere_api_key=os.getenv(ENV_EMBEDDINGS_COHERE_API_KEY) or os.getenv(ENV_COHERE_API_KEY),
             embeddings_cohere_model=os.getenv(ENV_EMBEDDINGS_COHERE_MODEL, DEFAULT_EMBEDDINGS_COHERE_MODEL),

diff --git a/hindsight-api-slim/hindsight_api/engine/embeddings.py b/hindsight-api-slim/hindsight_api/engine/embeddings.py
@@ -385,6 +385,7 @@ def __init__(
         model: str = DEFAULT_EMBEDDINGS_OPENAI_MODEL,
         base_url: str | None = None,
         batch_size: int = 100,
+        dimensions: int | None = None,
         max_retries: int = 3,
     ):
         """
@@ -395,12 +396,14 @@ def __init__(
             model: OpenAI embedding model name (default: text-embedding-3-small)
             base_url: Custom base URL for OpenAI-compatible API (e.g., Azure OpenAI endpoint)
             batch_size: Maximum batch size for embedding requests (default: 100)
+            dimensions: Optional requested output dimensions for OpenAI text-embedding-3 models
             max_retries: Maximum number of retries for failed requests (default: 3)
         """
         self.api_key = api_key
         self.model = model
         self.base_url = base_url
         self.batch_size = batch_size
+        self.dimensions = dimensions
         self.max_retries = max_retries
         self._client = None
         self._dimension: int | None = None
@@ -445,7 +448,9 @@ async def initialize(self) -> None:
         self._client = OpenAI(**client_kwargs)
 
         # Try to get dimension from known models, otherwise do a test embedding
-        if self.model in self.MODEL_DIMENSIONS:
+        if self.dimensions is not None:
+            self._dimension = self.dimensions
+        elif self.model in self.MODEL_DIMENSIONS:
             self._dimension = self.MODEL_DIMENSIONS[self.model]
         else:
             # Do a test embedding to detect dimension
@@ -480,10 +485,14 @@ def encode(self, texts: list[str]) -> list[list[float]]:
         for i in range(0, len(texts), self.batch_size):
             batch = texts[i : i + self.batch_size]
 
-            response = self._client.embeddings.create(
-                model=self.model,
-                input=batch,
-            )
+            request = {
+                "model": self.model,
+                "input": batch,
+            }
+            if self.dimensions is not None:
+                request["dimensions"] = self.dimensions
+
+            response = self._client.embeddings.create(**request)
 
             # Sort by index to ensure correct order
             batch_embeddings = sorted(response.data, key=lambda x: x.index)
@@ -492,6 +501,75 @@ def encode(self, texts: list[str]) -> list[list[float]]:
         return all_embeddings
 
 
+class CodexOAuthEmbeddings(OpenAIEmbeddings):
+    """
+    OpenAI embeddings using the Codex/ChatGPT OAuth token from ``~/.codex/auth.json``.
+
+    Codex OAuth is an LLM-provider auth path in Hindsight, but the same bearer token
+    can also authenticate against the standard OpenAI embeddings endpoint. This keeps
+    embeddings on the user's existing Codex subscription/OAuth path without requiring
+    a separate OpenAI/OpenRouter/Gemini/Cohere API key.
+
+    Token refresh is handled automatically: the manager proactively refreshes the
+    access_token before it expires and reactively refreshes on 401 responses from
+    the embeddings API.
+    """
+
+    def __init__(
+        self,
+        model: str = DEFAULT_EMBEDDINGS_OPENAI_MODEL,
+        batch_size: int = 100,
+        dimensions: int | None = None,
+        max_retries: int = 3,
+    ):
+        from .providers.codex_auth import CodexAuthManager
+
+        self._auth_manager = CodexAuthManager.from_file()
+        super().__init__(
+            api_key=self._auth_manager.access_token,
+            model=model,
+            base_url="https://api.openai.com/v1",
+            batch_size=batch_size,
+            dimensions=dimensions,
+            max_retries=max_retries,
+        )
+
+    @property
+    def provider_name(self) -> str:
+        return "openai-codex"
+
+    def encode(self, texts: list[str]) -> list[list[float]]:
+        """Generate embeddings, refreshing the OAuth token if needed.
+
+        Proactively refreshes before the call when the token is near expiry,
+        and reactively refreshes once on a 401 from the OpenAI embeddings API.
+        """
+        from openai import AuthenticationError
+
+        from .providers.codex_auth import CodexRefreshExpiredError
+
+        # Proactive refresh — cheap when fresh (JWT exp decode + compare).
+        self._auth_manager.ensure_fresh_token()
+        if self._auth_manager.access_token != self.api_key:
+            self.api_key = self._auth_manager.access_token
+            if self._client is not None:
+                self._client.api_key = self._auth_manager.access_token
+
+        try:
+            return super().encode(texts)
+        except AuthenticationError:
+            # Reactive refresh — token was valid by the JWT clock but the
+            # server rejected it (rotated server-side, race, etc.).
+            self._auth_manager.refresh_tokens(
+                reason="reactive (401 from embeddings API)",
+                force=True,
+            )
+            self.api_key = self._auth_manager.access_token
+            if self._client is not None:
+                self._client.api_key = self._auth_manager.access_token
+            return super().encode(texts)
+
+
 class CohereEmbeddings(Embeddings):
     """
     Cohere embeddings implementation using the Cohere API.
@@ -1140,6 +1218,14 @@ def create_embeddings_from_env() -> Embeddings:
             model=model,
             base_url=base_url,
             batch_size=config.embeddings_openai_batch_size,
+            dimensions=config.embeddings_openai_dimensions,
+        )
+    elif provider == "openai-codex":
+        model = os.environ.get(ENV_EMBEDDINGS_OPENAI_MODEL, DEFAULT_EMBEDDINGS_OPENAI_MODEL)
+        return CodexOAuthEmbeddings(
+            model=model,
+            batch_size=config.embeddings_openai_batch_size,
+            dimensions=config.embeddings_openai_dimensions,
         )
     elif provider == "openrouter":
         api_key = config.embeddings_openrouter_api_key
@@ -1153,6 +1239,7 @@ def create_embeddings_from_env() -> Embeddings:
             model=config.embeddings_openrouter_model,
             base_url="https://openrouter.ai/api/v1",
             batch_size=config.embeddings_openai_batch_size,
+            dimensions=config.embeddings_openai_dimensions,
         )
     elif provider == "cohere":
         api_key = config.embeddings_cohere_api_key
@@ -1206,5 +1293,6 @@ def create_embeddings_from_env() -> Embeddings:
     else:
         raise ValueError(
             f"Unknown embeddings provider: {provider}. "
-            f"Supported: 'local', 'tei', 'openai', 'cohere', 'google', 'litellm', 'litellm-sdk'"
+            f"Supported: 'local', 'tei', 'openai', 'openai-codex', 'openrouter', 'cohere', 'google', "
+            f"'litellm', 'litellm-sdk'"
         )