Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions hindsight-api-slim/hindsight_api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
ENV_EMBEDDINGS_OPENAI_MODEL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_MODEL"
ENV_EMBEDDINGS_OPENAI_BASE_URL = "HINDSIGHT_API_EMBEDDINGS_OPENAI_BASE_URL"
ENV_EMBEDDINGS_OPENAI_BATCH_SIZE = "HINDSIGHT_API_EMBEDDINGS_OPENAI_BATCH_SIZE"
ENV_EMBEDDINGS_OPENAI_DIMENSIONS = "HINDSIGHT_API_EMBEDDINGS_OPENAI_DIMENSIONS"

# Gemini/Vertex AI embeddings configuration
ENV_EMBEDDINGS_GEMINI_API_KEY = "HINDSIGHT_API_EMBEDDINGS_GEMINI_API_KEY"
Expand Down Expand Up @@ -792,6 +793,13 @@ def _parse_positive_int(name: str, raw: str | None, default: int) -> int:
return parsed


def _parse_optional_positive_int(name: str, raw: str | None) -> int | None:
"""Parse an optional env var that must be a positive integer when set."""
if raw is None or raw == "":
return None
return _parse_positive_int(name, raw, 1)


def _validate_extraction_mode(mode: str) -> str:
"""Validate and normalize extraction mode."""
mode_lower = mode.lower()
Expand Down Expand Up @@ -1179,6 +1187,7 @@ class HindsightConfig:
# Defaulted fields (source-compatible additions — existing direct constructor callers keep working).
# Keep at the end of the dataclass; Python forbids non-default fields after default fields.
embeddings_openai_batch_size: int = DEFAULT_EMBEDDINGS_OPENAI_BATCH_SIZE
embeddings_openai_dimensions: int | None = None

# Class-level sets for configuration categorization

Expand Down Expand Up @@ -1538,6 +1547,10 @@ def from_env(cls) -> "HindsightConfig":
os.getenv(ENV_EMBEDDINGS_OPENAI_BATCH_SIZE),
DEFAULT_EMBEDDINGS_OPENAI_BATCH_SIZE,
),
embeddings_openai_dimensions=_parse_optional_positive_int(
ENV_EMBEDDINGS_OPENAI_DIMENSIONS,
os.getenv(ENV_EMBEDDINGS_OPENAI_DIMENSIONS),
),
# Cohere embeddings (with backward-compatible fallback to shared API key)
embeddings_cohere_api_key=os.getenv(ENV_EMBEDDINGS_COHERE_API_KEY) or os.getenv(ENV_COHERE_API_KEY),
embeddings_cohere_model=os.getenv(ENV_EMBEDDINGS_COHERE_MODEL, DEFAULT_EMBEDDINGS_COHERE_MODEL),
Expand Down
100 changes: 94 additions & 6 deletions hindsight-api-slim/hindsight_api/engine/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ def __init__(
model: str = DEFAULT_EMBEDDINGS_OPENAI_MODEL,
base_url: str | None = None,
batch_size: int = 100,
dimensions: int | None = None,
max_retries: int = 3,
):
"""
Expand All @@ -395,12 +396,14 @@ def __init__(
model: OpenAI embedding model name (default: text-embedding-3-small)
base_url: Custom base URL for OpenAI-compatible API (e.g., Azure OpenAI endpoint)
batch_size: Maximum batch size for embedding requests (default: 100)
dimensions: Optional requested output dimensions for OpenAI text-embedding-3 models
max_retries: Maximum number of retries for failed requests (default: 3)
"""
self.api_key = api_key
self.model = model
self.base_url = base_url
self.batch_size = batch_size
self.dimensions = dimensions
self.max_retries = max_retries
self._client = None
self._dimension: int | None = None
Expand Down Expand Up @@ -445,7 +448,9 @@ async def initialize(self) -> None:
self._client = OpenAI(**client_kwargs)

# Try to get dimension from known models, otherwise do a test embedding
if self.model in self.MODEL_DIMENSIONS:
if self.dimensions is not None:
self._dimension = self.dimensions
elif self.model in self.MODEL_DIMENSIONS:
self._dimension = self.MODEL_DIMENSIONS[self.model]
else:
# Do a test embedding to detect dimension
Expand Down Expand Up @@ -480,10 +485,14 @@ def encode(self, texts: list[str]) -> list[list[float]]:
for i in range(0, len(texts), self.batch_size):
batch = texts[i : i + self.batch_size]

response = self._client.embeddings.create(
model=self.model,
input=batch,
)
request = {
"model": self.model,
"input": batch,
}
if self.dimensions is not None:
request["dimensions"] = self.dimensions

response = self._client.embeddings.create(**request)

# Sort by index to ensure correct order
batch_embeddings = sorted(response.data, key=lambda x: x.index)
Expand All @@ -492,6 +501,75 @@ def encode(self, texts: list[str]) -> list[list[float]]:
return all_embeddings


class CodexOAuthEmbeddings(OpenAIEmbeddings):
"""
OpenAI embeddings using the Codex/ChatGPT OAuth token from ``~/.codex/auth.json``.

Codex OAuth is an LLM-provider auth path in Hindsight, but the same bearer token
can also authenticate against the standard OpenAI embeddings endpoint. This keeps
embeddings on the user's existing Codex subscription/OAuth path without requiring
a separate OpenAI/OpenRouter/Gemini/Cohere API key.

Token refresh is handled automatically: the manager proactively refreshes the
access_token before it expires and reactively refreshes on 401 responses from
the embeddings API.
"""

def __init__(
self,
model: str = DEFAULT_EMBEDDINGS_OPENAI_MODEL,
batch_size: int = 100,
dimensions: int | None = None,
max_retries: int = 3,
):
from .providers.codex_auth import CodexAuthManager

self._auth_manager = CodexAuthManager.from_file()
super().__init__(
api_key=self._auth_manager.access_token,
model=model,
base_url="https://api.openai.com/v1",
batch_size=batch_size,
dimensions=dimensions,
max_retries=max_retries,
)

@property
def provider_name(self) -> str:
return "openai-codex"

def encode(self, texts: list[str]) -> list[list[float]]:
"""Generate embeddings, refreshing the OAuth token if needed.

Proactively refreshes before the call when the token is near expiry,
and reactively refreshes once on a 401 from the OpenAI embeddings API.
"""
from openai import AuthenticationError

from .providers.codex_auth import CodexRefreshExpiredError

# Proactive refresh — cheap when fresh (JWT exp decode + compare).
self._auth_manager.ensure_fresh_token()
if self._auth_manager.access_token != self.api_key:
self.api_key = self._auth_manager.access_token
if self._client is not None:
self._client.api_key = self._auth_manager.access_token

try:
return super().encode(texts)
except AuthenticationError:
# Reactive refresh — token was valid by the JWT clock but the
# server rejected it (rotated server-side, race, etc.).
self._auth_manager.refresh_tokens(
reason="reactive (401 from embeddings API)",
force=True,
)
self.api_key = self._auth_manager.access_token
if self._client is not None:
self._client.api_key = self._auth_manager.access_token
return super().encode(texts)


class CohereEmbeddings(Embeddings):
"""
Cohere embeddings implementation using the Cohere API.
Expand Down Expand Up @@ -1140,6 +1218,14 @@ def create_embeddings_from_env() -> Embeddings:
model=model,
base_url=base_url,
batch_size=config.embeddings_openai_batch_size,
dimensions=config.embeddings_openai_dimensions,
)
elif provider == "openai-codex":
model = os.environ.get(ENV_EMBEDDINGS_OPENAI_MODEL, DEFAULT_EMBEDDINGS_OPENAI_MODEL)
return CodexOAuthEmbeddings(
model=model,
batch_size=config.embeddings_openai_batch_size,
dimensions=config.embeddings_openai_dimensions,
)
elif provider == "openrouter":
api_key = config.embeddings_openrouter_api_key
Expand All @@ -1153,6 +1239,7 @@ def create_embeddings_from_env() -> Embeddings:
model=config.embeddings_openrouter_model,
base_url="https://openrouter.ai/api/v1",
batch_size=config.embeddings_openai_batch_size,
dimensions=config.embeddings_openai_dimensions,
)
elif provider == "cohere":
api_key = config.embeddings_cohere_api_key
Expand Down Expand Up @@ -1206,5 +1293,6 @@ def create_embeddings_from_env() -> Embeddings:
else:
raise ValueError(
f"Unknown embeddings provider: {provider}. "
f"Supported: 'local', 'tei', 'openai', 'cohere', 'google', 'litellm', 'litellm-sdk'"
f"Supported: 'local', 'tei', 'openai', 'openai-codex', 'openrouter', 'cohere', 'google', "
f"'litellm', 'litellm-sdk'"
)
Loading
Loading