diff --git a/apps/api/.env.example b/apps/api/.env.example index 9f176f555..776f34392 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -463,6 +463,7 @@ BASE_DOMAIN=workspaces.example.com # AI_PROXY_ENABLED=true # Kill switch: set "false" to disable (default: enabled) # AI_PROXY_DEFAULT_MODEL=@cf/meta/llama-4-scout-17b-16e-instruct # Default model for OpenCode (override via admin UI or env var) # AI_PROXY_DEFAULT_ANTHROPIC_MODEL=claude-sonnet-4-6 # Default model for Claude Code proxy fallback +# AI_PROXY_DEFAULT_OPENAI_MODEL=gpt-4.1 # Default model for Codex proxy fallback # AI_PROXY_ALLOWED_MODELS=@cf/meta/llama-4-scout-17b-16e-instruct,claude-haiku-4-5-20251001,@cf/qwen/qwen3-30b-a3b-fp8,@cf/google/gemma-3-12b-it # AI_PROXY_DAILY_INPUT_TOKEN_LIMIT=500000 # Per-user daily input token cap # AI_PROXY_DAILY_OUTPUT_TOKEN_LIMIT=200000 # Per-user daily output token cap diff --git a/apps/api/src/env.ts b/apps/api/src/env.ts index 0949260ed..e53597138 100644 --- a/apps/api/src/env.ts +++ b/apps/api/src/env.ts @@ -504,6 +504,7 @@ export interface Env { AI_PROXY_ENABLED?: string; // Kill switch: "false" to disable (default: enabled) AI_PROXY_DEFAULT_MODEL?: string; // Default model for OpenCode (default: claude-haiku-4-5-20251001) AI_PROXY_DEFAULT_ANTHROPIC_MODEL?: string; // Default model for Claude Code proxy (default: claude-sonnet-4-6) + AI_PROXY_DEFAULT_OPENAI_MODEL?: string; // Default model for Codex proxy (default: gpt-4.1) AI_PROXY_ALLOWED_MODELS?: string; // Comma-separated allowed models AI_PROXY_DAILY_INPUT_TOKEN_LIMIT?: string; // Per-user daily input token cap (default: 500000) AI_PROXY_DAILY_OUTPUT_TOKEN_LIMIT?: string; // Per-user daily output token cap (default: 200000) diff --git a/apps/api/src/routes/workspaces/runtime.ts b/apps/api/src/routes/workspaces/runtime.ts index 1d7dd7034..09a281c70 100644 --- a/apps/api/src/routes/workspaces/runtime.ts +++ b/apps/api/src/routes/workspaces/runtime.ts @@ -1,4 +1,4 @@ -import { AI_PROXY_DEFAULT_MODEL_KV_KEY, type AIProxyConfig, type BootstrapTokenData, DEFAULT_AI_PROXY_ANTHROPIC_MODEL, DEFAULT_AI_PROXY_MODEL, getAgentDefinition, isValidAgentType } from '@simple-agent-manager/shared'; +import { AI_PROXY_DEFAULT_MODEL_KV_KEY, type AIProxyConfig, type BootstrapTokenData, DEFAULT_AI_PROXY_ANTHROPIC_MODEL, DEFAULT_AI_PROXY_MODEL, DEFAULT_AI_PROXY_OPENAI_MODEL, getAgentDefinition, isValidAgentType } from '@simple-agent-manager/shared'; import { and, eq, isNull } from 'drizzle-orm'; import { drizzle } from 'drizzle-orm/d1'; import { Hono } from 'hono'; @@ -27,6 +27,9 @@ import { verifyWorkspaceCallbackAuth, } from './_helpers'; +/** Agent types eligible for AI proxy credential fallback (module-scope for isolate reuse). */ +const PROXY_ELIGIBLE_AGENTS: ReadonlySet = new Set(['opencode', 'claude-code', 'openai-codex']); + const runtimeRoutes = new Hono<{ Bindings: Env }>(); runtimeRoutes.post('/:id/agent-key', jsonValidator(AgentTypeBodySchema), async (c) => { @@ -74,22 +77,39 @@ runtimeRoutes.post('/:id/agent-key', jsonValidator(AgentTypeBodySchema), async ( // AI proxy fallback: if no user credential and the AI proxy is enabled, // return platform inference config so the VM agent can use the proxy. - // Applies to OpenCode (openai-compatible format) and Claude Code (native Anthropic format). + // Applies to OpenCode (openai-compatible format), Claude Code (native Anthropic format), + // and Codex (openai-proxy format via OPENAI_BASE_URL/OPENAI_API_KEY). const aiProxyEnabled = (c.env.AI_PROXY_ENABLED ?? 'true') !== 'false'; - if (!credentialData && (body.agentType === 'opencode' || body.agentType === 'claude-code') && aiProxyEnabled) { + if (!credentialData && PROXY_ELIGIBLE_AGENTS.has(body.agentType) && aiProxyEnabled) { const baseDomain = c.env.BASE_DOMAIN; - // Agent-specific proxy config: OpenCode uses openai-compatible, Claude Code uses native Anthropic + // Agent-specific proxy config: + // - Claude Code: native Anthropic format via anthropic-proxy + // - Codex: OpenAI format via openai-proxy (OPENAI_BASE_URL + OPENAI_API_KEY) + // - OpenCode: openai-compatible format via platform provider config const isClaudeCode = body.agentType === 'claude-code'; - const proxyBaseUrl = isClaudeCode - ? `https://api.${baseDomain}/ai/anthropic` - : `https://api.${baseDomain}/ai/v1`; - const proxyProvider = isClaudeCode ? 'anthropic-proxy' : 'openai-compatible'; + const isCodex = body.agentType === 'openai-codex'; + let proxyBaseUrl: string; + let proxyProvider: string; + if (isClaudeCode) { + proxyBaseUrl = `https://api.${baseDomain}/ai/anthropic`; + proxyProvider = 'anthropic-proxy'; + } else if (isCodex) { + proxyBaseUrl = `https://api.${baseDomain}/ai/v1`; + proxyProvider = 'openai-proxy'; + } else { + proxyBaseUrl = `https://api.${baseDomain}/ai/v1`; + proxyProvider = 'openai-compatible'; + } // Resolve default model: KV (admin-set) > env var > shared constant let defaultModel: string; if (isClaudeCode) { defaultModel = c.env.AI_PROXY_DEFAULT_ANTHROPIC_MODEL ?? DEFAULT_AI_PROXY_ANTHROPIC_MODEL; + } else if (isCodex) { + // Note: Codex model is not overridable via the admin AI proxy UI (KV). + // Use AI_PROXY_DEFAULT_OPENAI_MODEL env var to change the default. + defaultModel = c.env.AI_PROXY_DEFAULT_OPENAI_MODEL ?? DEFAULT_AI_PROXY_OPENAI_MODEL; } else { defaultModel = c.env.AI_PROXY_DEFAULT_MODEL ?? DEFAULT_AI_PROXY_MODEL; try { diff --git a/apps/api/tests/unit/routes/codex-proxy-fallback.test.ts b/apps/api/tests/unit/routes/codex-proxy-fallback.test.ts new file mode 100644 index 000000000..2b9db431d --- /dev/null +++ b/apps/api/tests/unit/routes/codex-proxy-fallback.test.ts @@ -0,0 +1,252 @@ +/** + * Tests for Codex (openai-codex) agent key fallback to AI proxy. + * + * When agentType === 'openai-codex' and no dedicated agent credential exists, + * the agent-key endpoint falls back to the platform AI proxy with + * inferenceConfig { provider: 'openai-proxy' }. + */ +import { drizzle } from 'drizzle-orm/d1'; +import { Hono } from 'hono'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +import type { Env } from '../../../src/env'; +import { workspacesRoutes } from '../../../src/routes/workspaces'; + +vi.mock('drizzle-orm/d1'); +vi.mock('../../../src/middleware/auth', () => ({ + requireAuth: () => vi.fn((_c: unknown, next: () => Promise) => next()), + requireApproved: () => vi.fn((_c: unknown, next: () => Promise) => next()), + getUserId: () => 'test-user-id', + getAuth: () => ({ userId: 'test-user-id' }), +})); +vi.mock('../../../src/services/jwt', () => ({ + verifyCallbackToken: vi.fn().mockResolvedValue({ workspace: 'ws-123', type: 'callback', scope: 'workspace' }), + signCallbackToken: vi.fn(), +})); +vi.mock('../../../src/services/encryption', () => ({ + encrypt: vi.fn(), + decrypt: vi.fn(), +})); + +const { decrypt } = await import('../../../src/services/encryption'); +const mockDecrypt = vi.mocked(decrypt); + +describe('POST /workspaces/:id/agent-key — Codex AI proxy fallback', () => { + let app: Hono<{ Bindings: Env }>; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + let mockDB: any; + + const mockEnv = { + DATABASE: {} as D1Database, + ENCRYPTION_KEY: 'test-key', + JWT_PUBLIC_KEY: 'test-public-key', + CALLBACK_TOKEN_AUDIENCE: 'test-audience', + CALLBACK_TOKEN_ISSUER: 'test-issuer', + BASE_DOMAIN: 'sammy.party', + KV: { get: vi.fn().mockResolvedValue(null) }, + } as unknown as Env; + + function postAgentKey(body: unknown, env?: Env): Promise { + return app.request( + '/api/workspaces/ws-123/agent-key', + { + method: 'POST', + body: JSON.stringify(body), + headers: { + 'Content-Type': 'application/json', + Authorization: 'Bearer test-callback-token', + }, + }, + env ?? mockEnv, + ); + } + + beforeEach(() => { + vi.clearAllMocks(); + + app = new Hono<{ Bindings: Env }>(); + app.onError((err, c) => { + const appError = err as { + statusCode?: number; + error?: string; + message?: string; + }; + if ( + typeof appError.statusCode === 'number' && + typeof appError.error === 'string' + ) { + return c.json( + { error: appError.error, message: appError.message }, + appError.statusCode as 400 | 401 | 403 | 404 | 500, + ); + } + return c.json({ error: 'INTERNAL_ERROR', message: err.message }, 500); + }); + app.route('/api/workspaces', workspacesRoutes); + + mockDB = { + select: vi.fn().mockReturnThis(), + from: vi.fn().mockReturnThis(), + where: vi.fn().mockReturnThis(), + limit: vi.fn(), + update: vi.fn().mockReturnThis(), + set: vi.fn().mockReturnThis(), + }; + vi.mocked(drizzle).mockReturnValue(mockDB as ReturnType); + }); + + it('returns openai-proxy inferenceConfig when no openai-codex credential exists', async () => { + let queryCount = 0; + mockDB.limit.mockImplementation(() => { + queryCount++; + if (queryCount === 1) { + // workspace lookup + return [{ userId: 'user-1', projectId: null }]; + } + // All credential lookups return empty + return []; + }); + + const resp = await postAgentKey({ agentType: 'openai-codex' }); + expect(resp.status).toBe(200); + + const body = await resp.json(); + expect(body.apiKey).toBe('__platform_proxy__'); + expect(body.credentialSource).toBe('platform'); + expect(body.credentialKind).toBe('api-key'); + expect(body.inferenceConfig).toBeDefined(); + expect(body.inferenceConfig.provider).toBe('openai-proxy'); + expect(body.inferenceConfig.baseURL).toBe('https://api.sammy.party/ai/v1'); + expect(body.inferenceConfig.apiKeySource).toBe('callback-token'); + expect(body.inferenceConfig.model).toBe('gpt-4.1'); + }); + + it('returns user credential when openai-codex credential exists (no proxy fallback)', async () => { + let queryCount = 0; + mockDB.limit.mockImplementation(() => { + queryCount++; + if (queryCount === 1) { + // workspace lookup + return [{ userId: 'user-1', projectId: null }]; + } + if (queryCount === 2) { + // agent-api-key for 'openai-codex' (user-scoped) → found + return [{ + encryptedToken: 'encrypted-key', + iv: 'iv-key', + credentialKind: 'api-key', + isActive: true, + }]; + } + return []; + }); + + mockDecrypt.mockResolvedValueOnce('sk-openai-user-key-123'); + + const resp = await postAgentKey({ agentType: 'openai-codex' }); + expect(resp.status).toBe(200); + + const body = await resp.json(); + expect(body.apiKey).toBe('sk-openai-user-key-123'); + expect(body.credentialKind).toBe('api-key'); + // Should NOT have inferenceConfig — user credential takes precedence + expect(body.inferenceConfig).toBeUndefined(); + }); + + it('returns 404 when no credential and AI proxy is disabled', async () => { + let queryCount = 0; + mockDB.limit.mockImplementation(() => { + queryCount++; + if (queryCount === 1) { + return [{ userId: 'user-1', projectId: null }]; + } + return []; + }); + + const disabledEnv = { ...mockEnv, AI_PROXY_ENABLED: 'false' } as unknown as Env; + const resp = await postAgentKey({ agentType: 'openai-codex' }, disabledEnv); + expect(resp.status).toBe(404); + }); + + it('uses custom model from AI_PROXY_DEFAULT_OPENAI_MODEL env var when set', async () => { + let queryCount = 0; + mockDB.limit.mockImplementation(() => { + queryCount++; + if (queryCount === 1) { + return [{ userId: 'user-1', projectId: null }]; + } + return []; + }); + + const customEnv = { + ...mockEnv, + AI_PROXY_DEFAULT_OPENAI_MODEL: 'gpt-4.1-mini', + } as unknown as Env; + + const resp = await postAgentKey({ agentType: 'openai-codex' }, customEnv); + expect(resp.status).toBe(200); + + const body = await resp.json(); + expect(body.inferenceConfig.model).toBe('gpt-4.1-mini'); + }); + + it('tracks credential source on associated task', async () => { + let queryCount = 0; + mockDB.limit.mockImplementation(() => { + queryCount++; + if (queryCount === 1) { + // workspace lookup + return [{ userId: 'user-1', projectId: null }]; + } + if (queryCount <= 3) { + // Credential lookups (user-scoped + platform) → empty + return []; + } + // Task lookup (inside AI proxy fallback block) + if (queryCount === 4) return [{ id: 'task-1' }]; + return []; + }); + + const resp = await postAgentKey({ agentType: 'openai-codex' }); + expect(resp.status).toBe(200); + + // Verify update was called (task credential source tracking) + expect(mockDB.update).toHaveBeenCalled(); + }); + + it('does NOT affect existing opencode proxy fallback', async () => { + let queryCount = 0; + mockDB.limit.mockImplementation(() => { + queryCount++; + if (queryCount === 1) { + return [{ userId: 'user-1', projectId: null }]; + } + return []; + }); + + const resp = await postAgentKey({ agentType: 'opencode' }); + expect(resp.status).toBe(200); + + const body = await resp.json(); + // OpenCode should still get openai-compatible, not openai-proxy + expect(body.inferenceConfig.provider).toBe('openai-compatible'); + }); + + it('does NOT affect existing claude-code proxy fallback', async () => { + let queryCount = 0; + mockDB.limit.mockImplementation(() => { + queryCount++; + if (queryCount === 1) { + return [{ userId: 'user-1', projectId: null }]; + } + return []; + }); + + const resp = await postAgentKey({ agentType: 'claude-code' }); + expect(resp.status).toBe(200); + + const body = await resp.json(); + // Claude Code should still get anthropic-proxy + expect(body.inferenceConfig.provider).toBe('anthropic-proxy'); + }); +}); diff --git a/packages/shared/src/constants/ai-services.ts b/packages/shared/src/constants/ai-services.ts index a55743f6d..242840adf 100644 --- a/packages/shared/src/constants/ai-services.ts +++ b/packages/shared/src/constants/ai-services.ts @@ -124,6 +124,10 @@ export const DEFAULT_AI_PROXY_MODEL = '@cf/meta/llama-4-scout-17b-16e-instruct'; * Override via AI_PROXY_DEFAULT_ANTHROPIC_MODEL env var. */ export const DEFAULT_AI_PROXY_ANTHROPIC_MODEL = 'claude-sonnet-4-6'; +/** Default model for OpenAI proxy fallback (Codex agent). + * Override via AI_PROXY_DEFAULT_OPENAI_MODEL env var. */ +export const DEFAULT_AI_PROXY_OPENAI_MODEL = 'gpt-4.1'; + /** Budget tier for platform AI models. */ export type PlatformAIModelTier = 'free' | 'standard' | 'premium'; diff --git a/packages/shared/src/constants/index.ts b/packages/shared/src/constants/index.ts index a4e15dad1..0213e9401 100644 --- a/packages/shared/src/constants/index.ts +++ b/packages/shared/src/constants/index.ts @@ -165,6 +165,7 @@ export { DEFAULT_AI_PROXY_DAILY_OUTPUT_TOKEN_LIMIT, DEFAULT_AI_PROXY_MAX_INPUT_TOKENS_PER_REQUEST, DEFAULT_AI_PROXY_MODEL, + DEFAULT_AI_PROXY_OPENAI_MODEL, DEFAULT_AI_PROXY_RATE_LIMIT_RPM, DEFAULT_AI_PROXY_RATE_LIMIT_WINDOW_SECONDS, DEFAULT_AI_PROXY_STREAM_TIMEOUT_MS, diff --git a/packages/vm-agent/internal/acp/session_host.go b/packages/vm-agent/internal/acp/session_host.go index 7533dc444..754eeb401 100644 --- a/packages/vm-agent/internal/acp/session_host.go +++ b/packages/vm-agent/internal/acp/session_host.go @@ -1005,6 +1005,19 @@ func (h *SessionHost) startAgent(ctx context.Context, agentType string, cred *ag "model", cred.inferenceConfig.Model, "callbackTokenLen", len(h.config.CallbackToken), "workspaceId", h.config.WorkspaceID) + } else if agentType == "openai-codex" && cred.inferenceConfig.Provider == "openai-proxy" { + // Codex: inject OPENAI_BASE_URL and OPENAI_API_KEY for SAM's OpenAI-format proxy. + // Codex appends /chat/completions to OPENAI_BASE_URL, so we set the base to /ai/v1. + envVars = append(envVars, "OPENAI_BASE_URL="+cred.inferenceConfig.BaseURL) + envVars = append(envVars, "OPENAI_API_KEY="+h.config.CallbackToken) + if cred.inferenceConfig.Model != "" { + envVars = append(envVars, "OPENAI_MODEL="+cred.inferenceConfig.Model) + } + slog.Info("Codex AI proxy credential injected", + "baseURL", cred.inferenceConfig.BaseURL, + "model", cred.inferenceConfig.Model, + "callbackTokenLen", len(h.config.CallbackToken), + "workspaceId", h.config.WorkspaceID) } else { // OpenCode: inject openai-compatible proxy env vars. envVars = append(envVars, "OPENCODE_PLATFORM_BASE_URL="+cred.inferenceConfig.BaseURL) diff --git a/tasks/backlog/2026-05-01-codex-credential-injection-fallback.md b/tasks/archive/2026-05-01-codex-credential-injection-fallback.md similarity index 100% rename from tasks/backlog/2026-05-01-codex-credential-injection-fallback.md rename to tasks/archive/2026-05-01-codex-credential-injection-fallback.md diff --git a/tasks/backlog/2026-05-01-ai-proxy-credential-hardening.md b/tasks/backlog/2026-05-01-ai-proxy-credential-hardening.md new file mode 100644 index 000000000..15dddbc57 --- /dev/null +++ b/tasks/backlog/2026-05-01-ai-proxy-credential-hardening.md @@ -0,0 +1,35 @@ +# AI Proxy Credential Hardening + +**Created**: 2026-05-01 +**Source**: Security audit of WP3 (Codex Credential Injection Fallback) + +## Problem Statement + +The AI proxy credential fallback paths (claude-code, openai-codex, opencode) inject the full workspace callback token as the API key into agent containers. This token has a 24-hour lifetime and grants access to all workspace runtime endpoints, not just the AI proxy. Additionally, the `__platform_proxy__` sentinel string propagates through the credential field even when inferenceConfig is present. + +These are pre-existing architectural patterns (not regressions from any single PR), but they represent defense-in-depth gaps that should be addressed. + +## Research Findings + +- Callback token TTL is 24h (jwt.ts), scoped to workspace — grants access to all runtime endpoints +- Claude Code path injects callback token as `ANTHROPIC_AUTH_TOKEN` +- Codex path injects callback token as `OPENAI_API_KEY` +- `__platform_proxy__` sentinel propagates to `agentCredential.credential` in Go agent +- Go agent does not validate `inferenceConfig.BaseURL` origin before injection +- No Go-side unit tests exist for any proxy injection branch in session_host.go + +## Implementation Checklist + +- [ ] Introduce short-lived, AI-proxy-scoped token variant (audience `workspace-ai-proxy`, TTL 1-2h) +- [ ] Inject proxy-scoped token instead of full callback token for all proxy paths +- [ ] In Go agent `fetchAgentKey`, clear `credential` field when `inferenceConfig != nil` +- [ ] Add origin validation for `inferenceConfig.BaseURL` in Go agent +- [ ] Add Go unit tests for all proxy injection branches (claude-code, openai-codex, opencode) +- [ ] Add credential-sync endpoint guard to reject `__platform_proxy__` payloads + +## Acceptance Criteria + +- [ ] Proxy-injected API keys cannot access non-proxy workspace endpoints +- [ ] `__platform_proxy__` sentinel never appears in auth files or credential-sync payloads +- [ ] `inferenceConfig.BaseURL` validated against control plane origin +- [ ] All proxy injection branches have Go-level test coverage