diff --git a/apps/api/.env.example b/apps/api/.env.example index ecb1bd49e..9f176f555 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -461,7 +461,8 @@ BASE_DOMAIN=workspaces.example.com # AI Inference Proxy (Workers AI gateway for trial/zero-config users) # AI_PROXY_ENABLED=true # Kill switch: set "false" to disable (default: enabled) -# AI_PROXY_DEFAULT_MODEL=@cf/meta/llama-4-scout-17b-16e-instruct # Default model (override via admin UI or env var) +# AI_PROXY_DEFAULT_MODEL=@cf/meta/llama-4-scout-17b-16e-instruct # Default model for OpenCode (override via admin UI or env var) +# AI_PROXY_DEFAULT_ANTHROPIC_MODEL=claude-sonnet-4-6 # Default model for Claude Code proxy fallback # AI_PROXY_ALLOWED_MODELS=@cf/meta/llama-4-scout-17b-16e-instruct,claude-haiku-4-5-20251001,@cf/qwen/qwen3-30b-a3b-fp8,@cf/google/gemma-3-12b-it # AI_PROXY_DAILY_INPUT_TOKEN_LIMIT=500000 # Per-user daily input token cap # AI_PROXY_DAILY_OUTPUT_TOKEN_LIMIT=200000 # Per-user daily output token cap diff --git a/apps/api/src/env.ts b/apps/api/src/env.ts index c8624ecf3..0949260ed 100644 --- a/apps/api/src/env.ts +++ b/apps/api/src/env.ts @@ -502,7 +502,8 @@ export interface Env { TRIGGER_STALE_RECOVERY_BATCH_SIZE?: string; // Max stale executions to recover per sweep (default: 100) // AI Inference Proxy (Cloudflare AI Gateway — Workers AI + Anthropic) AI_PROXY_ENABLED?: string; // Kill switch: "false" to disable (default: enabled) - AI_PROXY_DEFAULT_MODEL?: string; // Default model (default: claude-haiku-4-5-20251001) + AI_PROXY_DEFAULT_MODEL?: string; // Default model for OpenCode (default: claude-haiku-4-5-20251001) + AI_PROXY_DEFAULT_ANTHROPIC_MODEL?: string; // Default model for Claude Code proxy (default: claude-sonnet-4-6) AI_PROXY_ALLOWED_MODELS?: string; // Comma-separated allowed models AI_PROXY_DAILY_INPUT_TOKEN_LIMIT?: string; // Per-user daily input token cap (default: 500000) AI_PROXY_DAILY_OUTPUT_TOKEN_LIMIT?: string; // Per-user daily output token cap (default: 200000) diff --git a/apps/api/src/routes/workspaces/runtime.ts b/apps/api/src/routes/workspaces/runtime.ts index a6de46862..1d7dd7034 100644 --- a/apps/api/src/routes/workspaces/runtime.ts +++ b/apps/api/src/routes/workspaces/runtime.ts @@ -1,4 +1,4 @@ -import { AI_PROXY_DEFAULT_MODEL_KV_KEY, type AIProxyConfig, type BootstrapTokenData, DEFAULT_AI_PROXY_MODEL, getAgentDefinition, isValidAgentType } from '@simple-agent-manager/shared'; +import { AI_PROXY_DEFAULT_MODEL_KV_KEY, type AIProxyConfig, type BootstrapTokenData, DEFAULT_AI_PROXY_ANTHROPIC_MODEL, DEFAULT_AI_PROXY_MODEL, getAgentDefinition, isValidAgentType } from '@simple-agent-manager/shared'; import { and, eq, isNull } from 'drizzle-orm'; import { drizzle } from 'drizzle-orm/d1'; import { Hono } from 'hono'; @@ -74,23 +74,34 @@ runtimeRoutes.post('/:id/agent-key', jsonValidator(AgentTypeBodySchema), async ( // AI proxy fallback: if no user credential and the AI proxy is enabled, // return platform inference config so the VM agent can use the proxy. - // Only applies to OpenCode — the proxy uses Workers AI for inference. + // Applies to OpenCode (openai-compatible format) and Claude Code (native Anthropic format). const aiProxyEnabled = (c.env.AI_PROXY_ENABLED ?? 'true') !== 'false'; - if (!credentialData && body.agentType === 'opencode' && aiProxyEnabled) { + if (!credentialData && (body.agentType === 'opencode' || body.agentType === 'claude-code') && aiProxyEnabled) { const baseDomain = c.env.BASE_DOMAIN; - const proxyBaseUrl = `https://api.${baseDomain}/ai/v1`; + + // Agent-specific proxy config: OpenCode uses openai-compatible, Claude Code uses native Anthropic + const isClaudeCode = body.agentType === 'claude-code'; + const proxyBaseUrl = isClaudeCode + ? `https://api.${baseDomain}/ai/anthropic` + : `https://api.${baseDomain}/ai/v1`; + const proxyProvider = isClaudeCode ? 'anthropic-proxy' : 'openai-compatible'; // Resolve default model: KV (admin-set) > env var > shared constant - let defaultModel = c.env.AI_PROXY_DEFAULT_MODEL ?? DEFAULT_AI_PROXY_MODEL; - try { - const kvConfig = await c.env.KV.get(AI_PROXY_DEFAULT_MODEL_KV_KEY); - if (kvConfig) { - const parsed: AIProxyConfig = JSON.parse(kvConfig); - if (parsed.defaultModel) defaultModel = parsed.defaultModel; - } - } catch { /* KV unavailable or corrupt data — use env/default */ } + let defaultModel: string; + if (isClaudeCode) { + defaultModel = c.env.AI_PROXY_DEFAULT_ANTHROPIC_MODEL ?? DEFAULT_AI_PROXY_ANTHROPIC_MODEL; + } else { + defaultModel = c.env.AI_PROXY_DEFAULT_MODEL ?? DEFAULT_AI_PROXY_MODEL; + try { + const kvConfig = await c.env.KV.get(AI_PROXY_DEFAULT_MODEL_KV_KEY); + if (kvConfig) { + const parsed: AIProxyConfig = JSON.parse(kvConfig); + if (parsed.defaultModel) defaultModel = parsed.defaultModel; + } + } catch { /* KV unavailable or corrupt data — use env/default */ } + } - log.info('agent_key.ai_proxy_fallback', { workspaceId, userId: workspace.userId, proxyBaseUrl }); + log.info('agent_key.ai_proxy_fallback', { workspaceId, userId: workspace.userId, proxyBaseUrl, agentType: body.agentType }); // Track credential source on associated task const taskRows = await db @@ -111,7 +122,7 @@ runtimeRoutes.post('/:id/agent-key', jsonValidator(AgentTypeBodySchema), async ( credentialKind: 'api-key' as const, credentialSource: 'platform' as const, inferenceConfig: { - provider: 'openai-compatible', + provider: proxyProvider, baseURL: proxyBaseUrl, model: defaultModel, apiKeySource: 'callback-token', diff --git a/apps/api/tests/unit/routes/claude-code-proxy-fallback.test.ts b/apps/api/tests/unit/routes/claude-code-proxy-fallback.test.ts new file mode 100644 index 000000000..1a1eb3f5a --- /dev/null +++ b/apps/api/tests/unit/routes/claude-code-proxy-fallback.test.ts @@ -0,0 +1,237 @@ +/** + * Tests for Claude Code agent key fallback to AI proxy. + * + * When agentType === 'claude-code' and no dedicated agent credential exists, + * the agent-key endpoint falls back to the platform AI proxy with + * inferenceConfig { provider: 'anthropic-proxy' }. + */ +import { drizzle } from 'drizzle-orm/d1'; +import { Hono } from 'hono'; +import { beforeEach, describe, expect, it, vi } from 'vitest'; + +import type { Env } from '../../../src/env'; +import { workspacesRoutes } from '../../../src/routes/workspaces'; + +vi.mock('drizzle-orm/d1'); +vi.mock('../../../src/middleware/auth', () => ({ + requireAuth: () => vi.fn((_c: unknown, next: () => Promise) => next()), + requireApproved: () => vi.fn((_c: unknown, next: () => Promise) => next()), + getUserId: () => 'test-user-id', + getAuth: () => ({ userId: 'test-user-id' }), +})); +vi.mock('../../../src/services/jwt', () => ({ + verifyCallbackToken: vi.fn().mockResolvedValue({ workspace: 'ws-123', type: 'callback', scope: 'workspace' }), + signCallbackToken: vi.fn(), +})); +vi.mock('../../../src/services/encryption', () => ({ + encrypt: vi.fn(), + decrypt: vi.fn(), +})); + +const { decrypt } = await import('../../../src/services/encryption'); +const mockDecrypt = vi.mocked(decrypt); + +describe('POST /workspaces/:id/agent-key — Claude Code AI proxy fallback', () => { + let app: Hono<{ Bindings: Env }>; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + let mockDB: any; + + const mockEnv = { + DATABASE: {} as D1Database, + ENCRYPTION_KEY: 'test-key', + JWT_PUBLIC_KEY: 'test-public-key', + CALLBACK_TOKEN_AUDIENCE: 'test-audience', + CALLBACK_TOKEN_ISSUER: 'test-issuer', + BASE_DOMAIN: 'sammy.party', + KV: { get: vi.fn().mockResolvedValue(null) }, + } as unknown as Env; + + function postAgentKey(body: unknown, env?: Env): Promise { + return app.request( + '/api/workspaces/ws-123/agent-key', + { + method: 'POST', + body: JSON.stringify(body), + headers: { + 'Content-Type': 'application/json', + Authorization: 'Bearer test-callback-token', + }, + }, + env ?? mockEnv, + ); + } + + beforeEach(() => { + vi.clearAllMocks(); + + app = new Hono<{ Bindings: Env }>(); + app.onError((err, c) => { + const appError = err as { + statusCode?: number; + error?: string; + message?: string; + }; + if ( + typeof appError.statusCode === 'number' && + typeof appError.error === 'string' + ) { + return c.json( + { error: appError.error, message: appError.message }, + appError.statusCode as 400 | 401 | 403 | 404 | 500, + ); + } + return c.json({ error: 'INTERNAL_ERROR', message: err.message }, 500); + }); + app.route('/api/workspaces', workspacesRoutes); + + mockDB = { + select: vi.fn().mockReturnThis(), + from: vi.fn().mockReturnThis(), + where: vi.fn().mockReturnThis(), + limit: vi.fn(), + update: vi.fn().mockReturnThis(), + set: vi.fn().mockReturnThis(), + }; + vi.mocked(drizzle).mockReturnValue(mockDB as ReturnType); + }); + + it('returns anthropic-proxy inferenceConfig when no claude-code credential exists', async () => { + let queryCount = 0; + mockDB.limit.mockImplementation(() => { + queryCount++; + if (queryCount === 1) { + // workspace lookup + return [{ userId: 'user-1', projectId: null }]; + } + // All credential lookups return empty + return []; + }); + + const resp = await postAgentKey({ agentType: 'claude-code' }); + expect(resp.status).toBe(200); + + const body = await resp.json(); + expect(body.apiKey).toBe('__platform_proxy__'); + expect(body.credentialSource).toBe('platform'); + expect(body.credentialKind).toBe('api-key'); + expect(body.inferenceConfig).toBeDefined(); + expect(body.inferenceConfig.provider).toBe('anthropic-proxy'); + expect(body.inferenceConfig.baseURL).toBe('https://api.sammy.party/ai/anthropic'); + expect(body.inferenceConfig.apiKeySource).toBe('callback-token'); + expect(body.inferenceConfig.model).toBe('claude-sonnet-4-6'); + }); + + it('returns user credential when claude-code credential exists (no proxy fallback)', async () => { + let queryCount = 0; + mockDB.limit.mockImplementation(() => { + queryCount++; + if (queryCount === 1) { + // workspace lookup + return [{ userId: 'user-1', projectId: null }]; + } + if (queryCount === 2) { + // agent-api-key for 'claude-code' (user-scoped) → found + return [{ + encryptedToken: 'encrypted-key', + iv: 'iv-key', + credentialKind: 'api-key', + isActive: true, + }]; + } + return []; + }); + + mockDecrypt.mockResolvedValueOnce('sk-ant-user-key-123'); + + const resp = await postAgentKey({ agentType: 'claude-code' }); + expect(resp.status).toBe(200); + + const body = await resp.json(); + expect(body.apiKey).toBe('sk-ant-user-key-123'); + expect(body.credentialKind).toBe('api-key'); + // Should NOT have inferenceConfig — user credential takes precedence + expect(body.inferenceConfig).toBeUndefined(); + }); + + it('returns 404 when no credential and AI proxy is disabled', async () => { + let queryCount = 0; + mockDB.limit.mockImplementation(() => { + queryCount++; + if (queryCount === 1) { + return [{ userId: 'user-1', projectId: null }]; + } + return []; + }); + + const disabledEnv = { ...mockEnv, AI_PROXY_ENABLED: 'false' } as unknown as Env; + const resp = await postAgentKey({ agentType: 'claude-code' }, disabledEnv); + expect(resp.status).toBe(404); + }); + + it('uses custom model from env var when set', async () => { + let queryCount = 0; + mockDB.limit.mockImplementation(() => { + queryCount++; + if (queryCount === 1) { + return [{ userId: 'user-1', projectId: null }]; + } + return []; + }); + + const customEnv = { + ...mockEnv, + AI_PROXY_DEFAULT_ANTHROPIC_MODEL: 'claude-opus-4-6', + } as unknown as Env; + + const resp = await postAgentKey({ agentType: 'claude-code' }, customEnv); + expect(resp.status).toBe(200); + + const body = await resp.json(); + expect(body.inferenceConfig.model).toBe('claude-opus-4-6'); + }); + + it('tracks credential source on associated task', async () => { + let queryCount = 0; + mockDB.limit.mockImplementation(() => { + queryCount++; + if (queryCount === 1) { + // workspace lookup + return [{ userId: 'user-1', projectId: null }]; + } + if (queryCount <= 3) { + // Credential lookups (user-scoped + platform) → empty + return []; + } + // Task lookup (inside AI proxy fallback block) + if (queryCount === 4) return [{ id: 'task-1' }]; + return []; + }); + // After the proxy fallback response, the update call chain: + // db.update().set().where() — mockDB already chains these via mockReturnThis() + + const resp = await postAgentKey({ agentType: 'claude-code' }); + expect(resp.status).toBe(200); + + // Verify update was called (task credential source tracking) + expect(mockDB.update).toHaveBeenCalled(); + }); + + it('does NOT use Scaleway fallback for claude-code', async () => { + // Claude Code has no fallbackCloudProvider, so it should skip directly to AI proxy + let queryCount = 0; + mockDB.limit.mockImplementation(() => { + queryCount++; + if (queryCount === 1) { + return [{ userId: 'user-1', projectId: null }]; + } + return []; + }); + + const resp = await postAgentKey({ agentType: 'claude-code' }); + expect(resp.status).toBe(200); + + const body = await resp.json(); + // Should get proxy fallback, not Scaleway + expect(body.inferenceConfig.provider).toBe('anthropic-proxy'); + }); +}); diff --git a/apps/api/tests/unit/routes/opencode-credential-fallback.test.ts b/apps/api/tests/unit/routes/opencode-credential-fallback.test.ts index aefa291a4..e92b3d3e0 100644 --- a/apps/api/tests/unit/routes/opencode-credential-fallback.test.ts +++ b/apps/api/tests/unit/routes/opencode-credential-fallback.test.ts @@ -201,7 +201,7 @@ describe('POST /workspaces/:id/agent-key — OpenCode Scaleway fallback', () => return []; }); - const resp = await postAgentKey({ agentType: 'claude-code' }); + const resp = await postAgentKey({ agentType: 'google-gemini' }); expect(resp.status).toBe(404); }); diff --git a/packages/shared/src/constants/ai-services.ts b/packages/shared/src/constants/ai-services.ts index 08ead733c..a55743f6d 100644 --- a/packages/shared/src/constants/ai-services.ts +++ b/packages/shared/src/constants/ai-services.ts @@ -120,6 +120,10 @@ export const DEFAULT_TTS_RETRY_BASE_DELAY_MS = 500; * the AI_PROXY_DEFAULT_MODEL env var. */ export const DEFAULT_AI_PROXY_MODEL = '@cf/meta/llama-4-scout-17b-16e-instruct'; +/** Default model for Anthropic proxy fallback (Claude Code agent). + * Override via AI_PROXY_DEFAULT_ANTHROPIC_MODEL env var. */ +export const DEFAULT_AI_PROXY_ANTHROPIC_MODEL = 'claude-sonnet-4-6'; + /** Budget tier for platform AI models. */ export type PlatformAIModelTier = 'free' | 'standard' | 'premium'; diff --git a/packages/shared/src/constants/index.ts b/packages/shared/src/constants/index.ts index 36a265c59..a4e15dad1 100644 --- a/packages/shared/src/constants/index.ts +++ b/packages/shared/src/constants/index.ts @@ -160,6 +160,7 @@ export { AI_PROXY_DEFAULT_MODEL_KV_KEY, type AIProxyConfig, DEFAULT_AI_PROXY_ALLOWED_MODELS, + DEFAULT_AI_PROXY_ANTHROPIC_MODEL, DEFAULT_AI_PROXY_DAILY_INPUT_TOKEN_LIMIT, DEFAULT_AI_PROXY_DAILY_OUTPUT_TOKEN_LIMIT, DEFAULT_AI_PROXY_MAX_INPUT_TOKENS_PER_REQUEST, diff --git a/packages/vm-agent/internal/acp/process.go b/packages/vm-agent/internal/acp/process.go index d5cf9171d..5fc450119 100644 --- a/packages/vm-agent/internal/acp/process.go +++ b/packages/vm-agent/internal/acp/process.go @@ -91,6 +91,7 @@ func parseEnvExportLines(content string) []string { // not appear in docker exec command-line arguments (visible in /proc/*/cmdline). var secretEnvNames = map[string]bool{ "ANTHROPIC_API_KEY": true, + "ANTHROPIC_AUTH_TOKEN": true, "CLAUDE_CODE_OAUTH_TOKEN": true, "OPENAI_API_KEY": true, "GH_TOKEN": true, diff --git a/packages/vm-agent/internal/acp/session_host.go b/packages/vm-agent/internal/acp/session_host.go index 84582e0bd..7533dc444 100644 --- a/packages/vm-agent/internal/acp/session_host.go +++ b/packages/vm-agent/internal/acp/session_host.go @@ -986,29 +986,46 @@ func (h *SessionHost) startAgent(ctx context.Context, agentType string, cred *ag } } } else if cred.inferenceConfig != nil && cred.inferenceConfig.APIKeySource == "callback-token" { - // Platform AI proxy: use the workspace callback token as the API key - // and inject the proxy base URL for OpenCode's openai-compatible provider. + // Platform AI proxy: use the workspace callback token as the API key. if h.config.CallbackToken == "" { return fmt.Errorf("platform AI proxy configured but CallbackToken is empty for workspace %s", h.config.WorkspaceID) } - envVars = append(envVars, "OPENCODE_PLATFORM_BASE_URL="+cred.inferenceConfig.BaseURL) - envVars = append(envVars, "OPENCODE_PLATFORM_API_KEY="+h.config.CallbackToken) - // Force provider to "platform" so buildOpencodeConfig generates the right config - if settings == nil { - settings = &agentSettingsPayload{} - } - settings.OpencodeProvider = "platform" - if settings.Model == "" && cred.inferenceConfig.Model != "" { - // Strip @cf/ prefix — see stripCFPrefix() doc comment. - settings.Model = stripCFPrefix(cred.inferenceConfig.Model) + + if agentType == "claude-code" && cred.inferenceConfig.Provider == "anthropic-proxy" { + // Claude Code: inject ANTHROPIC_BASE_URL and ANTHROPIC_AUTH_TOKEN for custom proxy. + // Claude Code appends /v1/messages to ANTHROPIC_BASE_URL automatically. + // ANTHROPIC_AUTH_TOKEN is used instead of ANTHROPIC_API_KEY for proxy auth. + envVars = append(envVars, "ANTHROPIC_BASE_URL="+cred.inferenceConfig.BaseURL) + envVars = append(envVars, "ANTHROPIC_AUTH_TOKEN="+h.config.CallbackToken) + if cred.inferenceConfig.Model != "" { + envVars = append(envVars, "ANTHROPIC_MODEL="+cred.inferenceConfig.Model) + } + slog.Info("Claude Code AI proxy credential injected", + "baseURL", cred.inferenceConfig.BaseURL, + "model", cred.inferenceConfig.Model, + "callbackTokenLen", len(h.config.CallbackToken), + "workspaceId", h.config.WorkspaceID) + } else { + // OpenCode: inject openai-compatible proxy env vars. + envVars = append(envVars, "OPENCODE_PLATFORM_BASE_URL="+cred.inferenceConfig.BaseURL) + envVars = append(envVars, "OPENCODE_PLATFORM_API_KEY="+h.config.CallbackToken) + // Force provider to "platform" so buildOpencodeConfig generates the right config + if settings == nil { + settings = &agentSettingsPayload{} + } + settings.OpencodeProvider = "platform" + if settings.Model == "" && cred.inferenceConfig.Model != "" { + // Strip @cf/ prefix — see stripCFPrefix() doc comment. + settings.Model = stripCFPrefix(cred.inferenceConfig.Model) + } + slog.Info("OpenCode AI proxy credential injected", + "baseURL", cred.inferenceConfig.BaseURL, + "model", cred.inferenceConfig.Model, + "settingsModel", settings.Model, + "settingsProvider", settings.OpencodeProvider, + "callbackTokenLen", len(h.config.CallbackToken), + "workspaceId", h.config.WorkspaceID) } - slog.Info("Platform AI proxy credential injected", - "baseURL", cred.inferenceConfig.BaseURL, - "model", cred.inferenceConfig.Model, - "settingsModel", settings.Model, - "settingsProvider", settings.OpencodeProvider, - "callbackTokenLen", len(h.config.CallbackToken), - "workspaceId", h.config.WorkspaceID) } else { envVars = append(envVars, fmt.Sprintf("%s=%s", info.envVarName, cred.credential)) } diff --git a/tasks/backlog/2026-04-30-claude-code-proxy-fallback.md b/tasks/archive/2026-04-30-claude-code-proxy-fallback.md similarity index 100% rename from tasks/backlog/2026-04-30-claude-code-proxy-fallback.md rename to tasks/archive/2026-04-30-claude-code-proxy-fallback.md