diff --git a/packages/cli/src/adapters/codex-api-format.ts b/packages/cli/src/adapters/codex-api-format.ts index 81ba952a..af8b6dad 100644 --- a/packages/cli/src/adapters/codex-api-format.ts +++ b/packages/cli/src/adapters/codex-api-format.ts @@ -1,18 +1,20 @@ /** - * CodexAPIFormat — Layer 1 wire format for the OpenAI Responses API (Codex models). + * CodexAPIFormat — Layer 1 wire format for the OpenAI Responses API. * - * The Codex Responses API is a distinct wire format from Chat Completions: + * The Responses API is a distinct wire format from Chat Completions: * - Uses 'input' instead of 'messages' * - Uses 'instructions' instead of 'system' messages * - Uses 'max_output_tokens' instead of 'max_tokens' * - Tools are flattened (no 'function' wrapper) * - SSE events use different event names (response.output_text.delta etc.) * - * This format handles Codex models only. All other OpenAI models use OpenAIAPIFormat. + * This format handles Codex-family and Responses-only OpenAI models. */ import { BaseAPIFormat, type AdapterResult, matchesModelFamily } from "./base-api-format.js"; import type { StreamFormat } from "../providers/transport/types.js"; +import { log } from "../logger.js"; +import { resolveOpenAIReasoningEffort } from "./openai-reasoning.js"; export class CodexAPIFormat extends BaseAPIFormat { constructor(modelId: string) { @@ -39,6 +41,18 @@ export class CodexAPIFormat extends BaseAPIFormat { return "openai-responses-sse"; } + override prepareRequest(request: any, originalRequest: any): any { + const reasoning = resolveOpenAIReasoningEffort(this.modelId, originalRequest); + if (reasoning) { + request.reasoning = { effort: reasoning.effort }; + delete request.thinking; + log(`[CodexAPIFormat] Mapped ${reasoning.source} -> reasoning.effort: ${reasoning.effort}`); + } + + this.truncateToolNames(request); + return request; + } + override getContextWindow(): number { // Codex models: use a safe default return 200_000; @@ -61,6 +75,12 @@ export class CodexAPIFormat extends BaseAPIFormat { payload.max_output_tokens = Math.max(16, claudeRequest.max_tokens); } + const reasoning = resolveOpenAIReasoningEffort(this.modelId, claudeRequest); + if (reasoning) { + payload.reasoning = { effort: reasoning.effort }; + log(`[CodexAPIFormat] Mapped ${reasoning.source} -> reasoning.effort: ${reasoning.effort}`); + } + if (tools.length > 0) { payload.tools = tools.map((tool: any) => { if (tool.type === "function" && tool.function) { diff --git a/packages/cli/src/adapters/openai-api-format.ts b/packages/cli/src/adapters/openai-api-format.ts index edefdaef..f3d7e0f1 100644 --- a/packages/cli/src/adapters/openai-api-format.ts +++ b/packages/cli/src/adapters/openai-api-format.ts @@ -2,18 +2,22 @@ * OpenAIAPIFormat — Layer 1 wire format for OpenAI Chat Completions API. * * Handles: - * - Context window detection for OpenAI models (gpt-*, o1, o3, codex) - * - Mapping 'thinking.budget_tokens' to 'reasoning_effort' for o1/o3 models + * - Context window detection for OpenAI models (gpt-*, o1, o3) + * - Mapping Claude thinking/output_config to OpenAI reasoning_effort * - max_completion_tokens vs max_tokens for newer models - * - Codex Responses API message conversion and payload building * - Tool choice mapping * - * Also serves as Layer 2 ModelDialect for OpenAI-native models (o1/o3 reasoning params). + * Also serves as Layer 2 ModelDialect for OpenAI-native chat models. */ import { BaseAPIFormat, type AdapterResult } from "./base-api-format.js"; import { log } from "../logger.js"; import type { StreamFormat } from "../providers/transport/types.js"; +import { + isOpenAIChatModel, + mapBudgetTokensToReasoningEffort, + resolveOpenAIReasoningEffort, +} from "./openai-reasoning.js"; export class OpenAIAPIFormat extends BaseAPIFormat { constructor(modelId: string) { @@ -36,17 +40,21 @@ export class OpenAIAPIFormat extends BaseAPIFormat { * Handle request preparation — reasoning parameters and tool name truncation */ override prepareRequest(request: any, originalRequest: any): any { - // Map thinking.budget_tokens -> reasoning_effort for o1/o3 models - if (originalRequest.thinking && this.isReasoningModel()) { - const { budget_tokens } = originalRequest.thinking; - let effort = "medium"; - if (budget_tokens < 4000) effort = "minimal"; - else if (budget_tokens < 16000) effort = "low"; - else if (budget_tokens >= 32000) effort = "high"; - + const reasoning = resolveOpenAIReasoningEffort(this.modelId, originalRequest); + if (reasoning) { + request.reasoning_effort = reasoning.effort; + delete request.thinking; + log(`[OpenAIAPIFormat] Mapped ${reasoning.source} -> reasoning_effort: ${reasoning.effort}`); + } else if (originalRequest.thinking?.budget_tokens !== undefined && this.isReasoningModel()) { + const effort = mapBudgetTokensToReasoningEffort(originalRequest.thinking.budget_tokens); request.reasoning_effort = effort; delete request.thinking; - log(`[OpenAIAPIFormat] Mapped budget ${budget_tokens} -> reasoning_effort: ${effort}`); + log( + `[OpenAIAPIFormat] Mapped thinking.budget_tokens ${originalRequest.thinking.budget_tokens} -> reasoning_effort: ${effort}` + ); + } else if (request.thinking && isOpenAIChatModel(this.modelId)) { + delete request.thinking; + log(`[OpenAIAPIFormat] Stripped unsupported thinking params for ${this.modelId}`); } // Truncate tool names if model has a limit @@ -59,7 +67,8 @@ export class OpenAIAPIFormat extends BaseAPIFormat { } shouldHandle(modelId: string): boolean { - return modelId.startsWith("oai/") || modelId.includes("o1") || modelId.includes("o3"); + const model = modelId.toLowerCase(); + return isOpenAIChatModel(modelId) || model.includes("o1") || model.includes("o3"); } getName(): string { @@ -130,16 +139,15 @@ export class OpenAIAPIFormat extends BaseAPIFormat { } } - // Reasoning params handled in prepareRequest instead - if (claudeRequest.thinking && this.isReasoningModel()) { - const { budget_tokens } = claudeRequest.thinking; - let effort = "medium"; - if (budget_tokens < 4000) effort = "minimal"; - else if (budget_tokens < 16000) effort = "low"; - else if (budget_tokens >= 32000) effort = "high"; + const reasoning = resolveOpenAIReasoningEffort(this.modelId, claudeRequest); + if (reasoning) { + payload.reasoning_effort = reasoning.effort; + log(`[OpenAIAPIFormat] Mapped ${reasoning.source} -> reasoning_effort: ${reasoning.effort}`); + } else if (claudeRequest.thinking?.budget_tokens !== undefined && this.isReasoningModel()) { + const effort = mapBudgetTokensToReasoningEffort(claudeRequest.thinking.budget_tokens); payload.reasoning_effort = effort; log( - `[OpenAIAPIFormat] Mapped thinking.budget_tokens ${budget_tokens} -> reasoning_effort: ${effort}` + `[OpenAIAPIFormat] Mapped thinking.budget_tokens ${claudeRequest.thinking.budget_tokens} -> reasoning_effort: ${effort}` ); } diff --git a/packages/cli/src/adapters/openai-reasoning.ts b/packages/cli/src/adapters/openai-reasoning.ts new file mode 100644 index 00000000..8009a088 --- /dev/null +++ b/packages/cli/src/adapters/openai-reasoning.ts @@ -0,0 +1,147 @@ +export type OpenAIReasoningEffort = + | "none" + | "minimal" + | "low" + | "medium" + | "high" + | "xhigh"; + +interface OpenAIReasoningProfile { + supported: OpenAIReasoningEffort[]; + transport: "chat" | "responses"; +} + +const REASONING_ORDER: OpenAIReasoningEffort[] = [ + "none", + "minimal", + "low", + "medium", + "high", + "xhigh", +]; + +const GPT5_REASONING_PROFILES: Record = { + "gpt-5": { supported: ["minimal", "low", "medium", "high"], transport: "chat" }, + "gpt-5-mini": { supported: ["low", "medium", "high"], transport: "chat" }, + "gpt-5-nano": { supported: [], transport: "chat" }, + "gpt-5-pro": { supported: ["high"], transport: "responses" }, + "gpt-5-codex": { supported: ["low", "medium", "high"], transport: "responses" }, + "gpt-5.1": { supported: ["none", "low", "medium", "high"], transport: "chat" }, + "gpt-5.1-codex": { supported: ["none", "low", "medium", "high"], transport: "responses" }, + "gpt-5.1-codex-mini": { + supported: ["none", "low", "medium", "high"], + transport: "responses", + }, + "gpt-5.1-codex-max": { + supported: ["none", "low", "medium", "high", "xhigh"], + transport: "responses", + }, + "gpt-5.2": { supported: ["none", "low", "medium", "high", "xhigh"], transport: "chat" }, + "gpt-5.2-pro": { supported: ["medium", "high", "xhigh"], transport: "responses" }, + "gpt-5.2-codex": { supported: ["low", "medium", "high", "xhigh"], transport: "responses" }, + "gpt-5.3-codex": { supported: ["low", "medium", "high", "xhigh"], transport: "responses" }, + "gpt-5.4": { supported: ["none", "low", "medium", "high", "xhigh"], transport: "chat" }, + "gpt-5.4-pro": { supported: ["medium", "high", "xhigh"], transport: "responses" }, + "gpt-5.4-mini": { supported: ["none", "low", "medium", "high"], transport: "chat" }, + "gpt-5.4-nano": { supported: [], transport: "chat" }, +}; + +function normalizeModelId(modelId: string): string { + const lower = modelId.toLowerCase(); + const bare = lower.split("/").pop() || lower; + + const knownModelIds = [ + "gpt-5.4-pro", + "gpt-5.4-mini", + "gpt-5.4-nano", + "gpt-5.3-codex", + "gpt-5.2-codex", + "gpt-5.2-pro", + "gpt-5.2", + "gpt-5.1-codex-max", + "gpt-5.1-codex-mini", + "gpt-5.1-codex", + "gpt-5.1", + "gpt-5-codex", + "gpt-5-mini", + "gpt-5-nano", + "gpt-5-pro", + "gpt-5", + ]; + + return knownModelIds.find((id) => bare === id || bare.startsWith(`${id}-`)) || bare; +} + +function getReasoningProfile(modelId: string): OpenAIReasoningProfile | null { + return GPT5_REASONING_PROFILES[normalizeModelId(modelId)] || null; +} + +function clampReasoningEffortUpward( + desired: OpenAIReasoningEffort, + supported: OpenAIReasoningEffort[] +): OpenAIReasoningEffort | null { + if (supported.length === 0) return null; + + const startIndex = REASONING_ORDER.indexOf(desired); + for (let index = startIndex; index < REASONING_ORDER.length; index++) { + const candidate = REASONING_ORDER[index]; + if (supported.includes(candidate)) return candidate; + } + + if (supported.includes("high")) return "high"; + return supported[supported.length - 1] || null; +} + +export function mapBudgetTokensToReasoningEffort(budgetTokens: number): OpenAIReasoningEffort { + let effort: OpenAIReasoningEffort = "medium"; + if (budgetTokens < 4000) effort = "minimal"; + else if (budgetTokens < 16000) effort = "low"; + else if (budgetTokens >= 32000) effort = "high"; + return effort; +} + +export function isOpenAIChatModel(modelId: string): boolean { + return getReasoningProfile(modelId)?.transport === "chat"; +} + +export function isOpenAIResponsesModel(modelId: string): boolean { + return getReasoningProfile(modelId)?.transport === "responses"; +} + +export function resolveOpenAIReasoningEffort( + modelId: string, + claudeRequest: any +): { effort: OpenAIReasoningEffort; source: string } | null { + const profile = getReasoningProfile(modelId); + if (!profile || profile.supported.length === 0) return null; + + if (claudeRequest?.thinking?.type === "disabled") { + const effort = clampReasoningEffortUpward("none", profile.supported); + return effort ? { effort, source: 'thinking.type="disabled"' } : null; + } + + const effortParam = claudeRequest?.output_config?.effort; + if (effortParam === "low" || effortParam === "medium" || effortParam === "high") { + const effort = clampReasoningEffortUpward(effortParam, profile.supported); + return effort ? { effort, source: `output_config.effort=${effortParam}` } : null; + } + + if (effortParam === "max") { + const effort = clampReasoningEffortUpward("xhigh", profile.supported); + return effort ? { effort, source: "output_config.effort=max" } : null; + } + + const budgetTokens = claudeRequest?.thinking?.budget_tokens; + if (typeof budgetTokens === "number") { + const desired = mapBudgetTokensToReasoningEffort(budgetTokens); + const effort = clampReasoningEffortUpward(desired, profile.supported); + return effort ? { effort, source: `thinking.budget_tokens=${budgetTokens}` } : null; + } + + if (claudeRequest?.thinking?.type === "adaptive") { + const effort = clampReasoningEffortUpward("high", profile.supported); + return effort ? { effort, source: 'thinking.type="adaptive"' } : null; + } + + return null; +} diff --git a/packages/cli/src/format-translation.test.ts b/packages/cli/src/format-translation.test.ts index 907ab188..2f7605ed 100644 --- a/packages/cli/src/format-translation.test.ts +++ b/packages/cli/src/format-translation.test.ts @@ -441,6 +441,136 @@ describe("Model Adapter Quirks", () => { expect(request.thinking).toBeUndefined(); }); + test("OpenAIAdapter: output_config.effort=max → reasoning_effort for gpt-5.4", async () => { + const { OpenAIAPIFormat } = await import("./adapters/openai-api-format.js"); + const adapter = new OpenAIAPIFormat("gpt-5.4"); + + const request: any = { model: "gpt-5.4", messages: [] }; + const original = { output_config: { effort: "max" } }; + + adapter.prepareRequest(request, original); + expect(request.reasoning_effort).toBe("xhigh"); + }); + + test("OpenAIAdapter: output_config.effort=max → reasoning_effort for gpt-5.4-mini", async () => { + const { OpenAIAPIFormat } = await import("./adapters/openai-api-format.js"); + const adapter = new OpenAIAPIFormat("gpt-5.4-mini"); + + const request: any = { model: "gpt-5.4-mini", messages: [] }; + const original = { output_config: { effort: "max" } }; + + adapter.prepareRequest(request, original); + expect(request.reasoning_effort).toBe("high"); + }); + + test('OpenAIAdapter: thinking.type="disabled" → reasoning_effort for gpt-5', async () => { + const { OpenAIAPIFormat } = await import("./adapters/openai-api-format.js"); + const adapter = new OpenAIAPIFormat("gpt-5"); + + const request: any = { model: "gpt-5", messages: [] }; + const original = { thinking: { type: "disabled" } }; + + adapter.prepareRequest(request, original); + expect(request.reasoning_effort).toBe("minimal"); + }); + + test('OpenAIAdapter: thinking.type="disabled" → reasoning_effort for gpt-5-mini', async () => { + const { OpenAIAPIFormat } = await import("./adapters/openai-api-format.js"); + const adapter = new OpenAIAPIFormat("gpt-5-mini"); + + const request: any = { model: "gpt-5-mini", messages: [] }; + const original = { thinking: { type: "disabled" } }; + + adapter.prepareRequest(request, original); + expect(request.reasoning_effort).toBe("low"); + }); + + test('OpenAIAdapter: thinking.type="disabled" → reasoning_effort for gpt-5.1', async () => { + const { OpenAIAPIFormat } = await import("./adapters/openai-api-format.js"); + const adapter = new OpenAIAPIFormat("gpt-5.1"); + + const request: any = { model: "gpt-5.1", messages: [] }; + const original = { thinking: { type: "disabled" } }; + + adapter.prepareRequest(request, original); + expect(request.reasoning_effort).toBe("none"); + }); + + test('OpenAIAdapter: thinking.type="adaptive" → reasoning_effort for gpt-5.4', async () => { + const { OpenAIAPIFormat } = await import("./adapters/openai-api-format.js"); + const adapter = new OpenAIAPIFormat("gpt-5.4"); + + const request: any = { model: "gpt-5.4", messages: [] }; + const original = { thinking: { type: "adaptive" } }; + + adapter.prepareRequest(request, original); + expect(request.reasoning_effort).toBe("high"); + }); + + test("OpenAIAdapter: omits reasoning_effort for gpt-5.4-nano", async () => { + const { OpenAIAPIFormat } = await import("./adapters/openai-api-format.js"); + const adapter = new OpenAIAPIFormat("gpt-5.4-nano"); + + const request: any = { model: "gpt-5.4-nano", messages: [], thinking: { type: "adaptive" } }; + const original = { thinking: { type: "adaptive" } }; + + adapter.prepareRequest(request, original); + expect(request.reasoning_effort).toBeUndefined(); + expect(request.thinking).toBeUndefined(); + }); + + test("CodexAPIFormat: output_config.effort=low → reasoning.effort for gpt-5.4-pro", async () => { + const { CodexAPIFormat } = await import("./adapters/codex-api-format.js"); + const adapter = new CodexAPIFormat("gpt-5.4-pro"); + + const payload = adapter.buildPayload( + { max_tokens: 4096, output_config: { effort: "low" } }, + [], + [] + ); + + expect(payload.reasoning).toEqual({ effort: "medium" }); + }); + + test('CodexAPIFormat: thinking.type="disabled" → reasoning.effort for gpt-5.4-pro', async () => { + const { CodexAPIFormat } = await import("./adapters/codex-api-format.js"); + const adapter = new CodexAPIFormat("gpt-5.4-pro"); + + const payload = adapter.buildPayload( + { max_tokens: 4096, thinking: { type: "disabled" } }, + [], + [] + ); + + expect(payload.reasoning).toEqual({ effort: "medium" }); + }); + + test("CodexAPIFormat: output_config.effort=max → reasoning.effort for gpt-5.3-codex", async () => { + const { CodexAPIFormat } = await import("./adapters/codex-api-format.js"); + const adapter = new CodexAPIFormat("gpt-5.3-codex"); + + const payload = adapter.buildPayload( + { max_tokens: 4096, output_config: { effort: "max" } }, + [], + [] + ); + + expect(payload.reasoning).toEqual({ effort: "xhigh" }); + }); + + test("CodexAPIFormat: output_config.effort=low → reasoning.effort for gpt-5-pro", async () => { + const { CodexAPIFormat } = await import("./adapters/codex-api-format.js"); + const adapter = new CodexAPIFormat("gpt-5-pro"); + + const payload = adapter.buildPayload( + { max_tokens: 4096, output_config: { effort: "low" } }, + [], + [] + ); + + expect(payload.reasoning).toEqual({ effort: "high" }); + }); + test("GLMAdapter: strips thinking params", async () => { const { GLMModelDialect } = await import("./adapters/glm-model-dialect.js"); const adapter = new GLMModelDialect("glm-5"); diff --git a/packages/cli/src/providers/provider-profiles.ts b/packages/cli/src/providers/provider-profiles.ts index 2661924f..6f891a49 100644 --- a/packages/cli/src/providers/provider-profiles.ts +++ b/packages/cli/src/providers/provider-profiles.ts @@ -25,6 +25,7 @@ import { GeminiCodeAssistProviderTransport } from "./transport/gemini-codeassist import { GeminiAPIFormat } from "../adapters/gemini-api-format.js"; import { OpenAIProviderTransport } from "./transport/openai.js"; import { OpenAIAPIFormat } from "../adapters/openai-api-format.js"; +import { CodexAPIFormat } from "../adapters/codex-api-format.js"; import { AnthropicProviderTransport } from "./transport/anthropic-compat.js"; import { AnthropicAPIFormat } from "../adapters/anthropic-api-format.js"; import { OllamaProviderTransport } from "./transport/ollamacloud.js"; @@ -39,6 +40,7 @@ import { getVertexConfig, validateVertexOAuthConfig } from "../auth/vertex-auth. import { log, logStderr } from "../logger.js"; import { resolveApiKeyProvenance, formatProvenanceLog } from "./api-key-provenance.js"; import type { ModelHandler } from "../handlers/types.js"; +import { isOpenAIResponsesModel } from "../adapters/openai-reasoning.js"; // --------------------------------------------------------------------------- // Types @@ -83,6 +85,12 @@ export interface ProviderProfile { // Profile implementations // --------------------------------------------------------------------------- +function createOpenAIAdapter(modelName: string): BaseModelAdapter { + const usesResponsesAPI = + isOpenAIResponsesModel(modelName) || modelName.toLowerCase().includes("codex"); + return usesResponsesAPI ? new CodexAPIFormat(modelName) : new OpenAIAPIFormat(modelName); +} + const geminiProfile: ProviderProfile = { createHandler(ctx) { const transport = new GeminiProviderTransport(ctx.provider, ctx.modelName, ctx.apiKey); @@ -113,7 +121,7 @@ const geminiCodeAssistProfile: ProviderProfile = { const openaiProfile: ProviderProfile = { createHandler(ctx) { const transport = new OpenAIProviderTransport(ctx.provider, ctx.modelName, ctx.apiKey); - const adapter = new OpenAIAPIFormat(ctx.modelName); + const adapter = createOpenAIAdapter(ctx.modelName); const handler = new ComposedHandler(transport, ctx.targetModel, ctx.modelName, ctx.port, { adapter, tokenStrategy: "delta-aware", @@ -163,7 +171,7 @@ const glmProfile: ProviderProfile = { * * Model routing inside the profile: * - MiniMax models → AnthropicProviderTransport + AnthropicAPIFormat - * - All other models → OpenAIProviderTransport + OpenAIAPIFormat (delta-aware) + * - All other models → OpenAIProviderTransport + OpenAI/Codex adapter (delta-aware) */ const openCodeZenProfile: ProviderProfile = { createHandler(ctx) { @@ -184,7 +192,7 @@ const openCodeZenProfile: ProviderProfile = { } const transport = new OpenAIProviderTransport(ctx.provider, ctx.modelName, zenApiKey); - const adapter = new OpenAIAPIFormat(ctx.modelName); + const adapter = createOpenAIAdapter(ctx.modelName); const handler = new ComposedHandler(transport, ctx.targetModel, ctx.modelName, ctx.port, { adapter, tokenStrategy: "delta-aware", diff --git a/packages/cli/src/providers/provider-routing.test.ts b/packages/cli/src/providers/provider-routing.test.ts index e5887f50..eb725d9a 100644 --- a/packages/cli/src/providers/provider-routing.test.ts +++ b/packages/cli/src/providers/provider-routing.test.ts @@ -226,6 +226,21 @@ describe("DialectManager — correct dialect selection", () => { expect(adapter).toBeInstanceOf(OpenAIAPIFormat); }); + test("gpt-5.4 → OpenAIAPIFormat", () => { + const adapter = new DialectManager("gpt-5.4").getAdapter(); + expect(adapter).toBeInstanceOf(OpenAIAPIFormat); + }); + + test("openai/gpt-5.4 → OpenAIAPIFormat", () => { + const adapter = new DialectManager("openai/gpt-5.4").getAdapter(); + expect(adapter).toBeInstanceOf(OpenAIAPIFormat); + }); + + test("openrouter/openai/gpt-5.4-mini → OpenAIAPIFormat", () => { + const adapter = new DialectManager("openrouter/openai/gpt-5.4-mini").getAdapter(); + expect(adapter).toBeInstanceOf(OpenAIAPIFormat); + }); + test("unknown-model → DefaultAPIFormat", () => { const adapter = new DialectManager("unknown-model").getAdapter(); expect(adapter).toBeInstanceOf(DefaultAPIFormat); diff --git a/packages/cli/src/providers/transport/openai.ts b/packages/cli/src/providers/transport/openai.ts index 18a7cdda..661d5e02 100644 --- a/packages/cli/src/providers/transport/openai.ts +++ b/packages/cli/src/providers/transport/openai.ts @@ -9,6 +9,7 @@ import type { ProviderTransport, StreamFormat } from "./types.js"; import type { RemoteProvider } from "../../handlers/shared/remote-provider-types.js"; import { log } from "../../logger.js"; +import { isOpenAIResponsesModel } from "../../adapters/openai-reasoning.js"; export class OpenAIProviderTransport implements ProviderTransport { readonly name: string; @@ -26,19 +27,21 @@ export class OpenAIProviderTransport implements ProviderTransport { this.name = provider.name; this.displayName = OpenAIProviderTransport.formatDisplayName(provider.name); - // Codex models use the Responses API which has a different streaming format - this.streamFormat = modelName.toLowerCase().includes("codex") - ? "openai-responses-sse" - : "openai-sse"; + // Responses models use a different streaming format from Chat Completions. + this.streamFormat = this.usesResponsesAPI() ? "openai-responses-sse" : "openai-sse"; } getEndpoint(): string { - if (this.modelName.toLowerCase().includes("codex")) { + if (this.usesResponsesAPI()) { return `${this.provider.baseUrl}/v1/responses`; } return `${this.provider.baseUrl}${this.provider.apiPath}`; } + private usesResponsesAPI(): boolean { + return isOpenAIResponsesModel(this.modelName) || this.modelName.toLowerCase().includes("codex"); + } + async getHeaders(): Promise> { const headers: Record = {}; if (this.apiKey) {