Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion code_puppy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,7 @@ def get_openai_reasoning_summary() -> str:
- detailed: fuller reasoning summaries
"""
allowed_values = {"auto", "concise", "detailed"}
configured = (get_value("openai_reasoning_summary") or "auto").strip().lower()
configured = (get_value("openai_reasoning_summary") or "detailed").strip().lower()
if configured not in allowed_values:
return "auto"
return configured
Expand Down
1 change: 1 addition & 0 deletions code_puppy/model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ def make_model_settings(

uses_responses_api = (
model_type == "chatgpt_oauth"
or model_type == "azure_foundry_openai"
or (model_type == "openai" and "codex" in model_name)
or (model_type == "custom_openai" and "codex" in model_name)
)
Expand Down
40 changes: 40 additions & 0 deletions code_puppy/plugins/azure_foundry/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,46 @@
"haiku": 200000, # 200K tokens for Haiku models
}

# Context lengths for OpenAI models (Azure doesn't expose this in the catalog API).
# Prefixes are matched longest-first against the model name.
OPENAI_CONTEXT_LENGTHS: dict[str, int] = {
"gpt-5.4": 1000000,
"gpt-5.4-mini": 1000000,
"gpt-5.3-codex": 1000000,
"gpt-5.3": 1000000,
"gpt-5.2-codex": 1000000,
"gpt-5.2": 1000000,
"gpt-5.1-codex-max": 1000000,
"gpt-5.1-codex-mini": 1000000,
"gpt-5.1-codex": 1000000,
"gpt-5.1": 1000000,
"gpt-5-codex": 1000000,
"gpt-5": 1000000,
"gpt-4.1": 1000000,
"gpt-4.1-mini": 1000000,
"gpt-4.1-nano": 1000000,
"o4-mini": 200000,
"o3": 200000,
"o3-mini": 200000,
"o1": 200000,
"o1-mini": 128000,
"codex-mini": 200000,
}
DEFAULT_OPENAI_CONTEXT_LENGTH = 128000


def get_openai_context_length(model_name: str) -> int:
"""Look up the context length for an OpenAI model by name.

Matches the longest prefix first so 'gpt-5.4-mini' matches before 'gpt-5.4'.
Falls back to DEFAULT_OPENAI_CONTEXT_LENGTH if no match.
"""
for prefix in sorted(OPENAI_CONTEXT_LENGTHS, key=len, reverse=True):
if model_name.startswith(prefix):
return OPENAI_CONTEXT_LENGTHS[prefix]
return DEFAULT_OPENAI_CONTEXT_LENGTH


# Default deployment name patterns (can be overridden by user)
DEFAULT_DEPLOYMENT_NAMES: dict[str, str] = {
"opus": "claude-opus-4-6",
Expand Down
189 changes: 189 additions & 0 deletions code_puppy/plugins/azure_foundry/discovery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
"""Azure AI Services deployment discovery.

Queries the Azure Management API to find AI Services accounts and
list their model deployments. Works with any AIServices account
hosting Anthropic, OpenAI, or other model formats.
"""

from __future__ import annotations

import logging
from dataclasses import dataclass
from typing import Any

logger = logging.getLogger(__name__)

AZURE_MANAGEMENT_SCOPE = "https://management.azure.com/.default"
MANAGEMENT_BASE = "https://management.azure.com"
RESOURCE_API_VERSION = "2021-04-01"
DEPLOYMENT_API_VERSION = "2024-10-01"


@dataclass
class AzureAccount:
"""Discovered Azure AI Services account."""

resource_id: str
name: str
location: str
resource_group: str
subscription_id: str


@dataclass
class AzureDeployment:
"""Discovered model deployment on an Azure AI Services account."""

name: str
model_name: str
model_format: str # "Anthropic", "OpenAI", etc.
model_version: str
provisioning_state: str
sku_name: str
capacity: int


def _get_management_token() -> str | None:
"""Get a token for the Azure Management API using az login credentials.

Returns:
Bearer token string, or None if auth fails.
"""
try:
from azure.identity import AzureCliCredential

credential = AzureCliCredential()
token = credential.get_token(AZURE_MANAGEMENT_SCOPE)
return token.token
except Exception as e:
logger.warning("Failed to get management token: %s", e)
return None


def _management_get(token: str, url: str) -> dict[str, Any] | None:
"""Make a GET request to the Azure Management API.

Args:
token: Bearer token for authentication.
url: Full URL to GET.

Returns:
Parsed JSON response, or None on error.
"""
try:
import httpx

resp = httpx.get(
url,
headers={"Authorization": f"Bearer {token}"},
timeout=30,
)
if resp.status_code == 200:
return resp.json()
logger.warning("Management API %s returned %d", url, resp.status_code)
return None
except Exception as e:
logger.warning("Management API request failed: %s", e)
return None


def find_account(resource_name: str) -> AzureAccount | None:
"""Find an Azure AI Services account by name across all accessible subscriptions.

Args:
resource_name: The account name to search for.

Returns:
AzureAccount if found, None otherwise.
"""
token = _get_management_token()
if not token:
return None

# List subscriptions
subs_url = f"{MANAGEMENT_BASE}/subscriptions?api-version=2022-12-01"
subs_resp = _management_get(token, subs_url)
if not subs_resp:
return None

subscriptions = subs_resp.get("value", [])

for sub in subscriptions:
sub_id = sub.get("subscriptionId", "")
if sub.get("state") != "Enabled":
continue

# Search for the resource by name and type
filter_str = (
f"name eq '{resource_name}' and "
f"resourceType eq 'Microsoft.CognitiveServices/accounts'"
)
resources_url = (
f"{MANAGEMENT_BASE}/subscriptions/{sub_id}/resources"
f"?$filter={filter_str}&api-version={RESOURCE_API_VERSION}"
)
resources_resp = _management_get(token, resources_url)
if not resources_resp:
continue

for resource in resources_resp.get("value", []):
rid = resource.get("id", "")
parts = rid.split("/")
# /subscriptions/{sub}/resourceGroups/{rg}/providers/.../accounts/{name}
rg_idx = next(
(i for i, p in enumerate(parts) if p.lower() == "resourcegroups"),
None,
)
rg = parts[rg_idx + 1] if rg_idx is not None else ""

return AzureAccount(
resource_id=rid,
name=resource_name,
location=resource.get("location", ""),
resource_group=rg,
subscription_id=sub_id,
)

return None


def list_deployments(account: AzureAccount) -> list[AzureDeployment]:
"""List all model deployments on an Azure AI Services account.

Args:
account: The account to query.

Returns:
List of deployments (all states, caller filters as needed).
"""
token = _get_management_token()
if not token:
return []

url = (
f"{MANAGEMENT_BASE}{account.resource_id}"
f"/deployments?api-version={DEPLOYMENT_API_VERSION}"
)
resp = _management_get(token, url)
if not resp:
return []

deployments = []
for d in resp.get("value", []):
props = d.get("properties", {})
model = props.get("model", {})
sku = d.get("sku", {})

deployments.append(
AzureDeployment(
name=d.get("name", ""),
model_name=model.get("name", ""),
model_format=model.get("format", ""),
model_version=model.get("version", ""),
provisioning_state=props.get("provisioningState", ""),
sku_name=sku.get("name", ""),
capacity=sku.get("capacity", 0),
)
)

return deployments
Loading